diff options
author | Ulyana Trafimovich <skvadrik@google.com> | 2020-10-28 15:59:29 +0000 |
---|---|---|
committer | Ulyana Trafimovich <skvadrik@google.com> | 2020-10-28 16:29:13 +0000 |
commit | 0a75ba66aa15ea1cdb3f57d0efd4ce7e7c14d45f (patch) | |
tree | c400a04968e71b03b008e62e77419c9036590b27 /src | |
parent | d50509637394306f9d075ed03556671c5e7df138 (diff) | |
download | vixl-0a75ba66aa15ea1cdb3f57d0efd4ce7e7c14d45f.tar.gz |
Revert "Merge remote-tracking branch 'aosp/upstream-master' into..."
Revert "ART: Fix breaking changes from recent VIXL update."
Revert submission 1331125-VIXL_UPDATE_SVE
Reason for revert: broken build git_master-art-host/art-gtest-heap-poisoning @ 6936943
Reverted Changes:
Ic10af84a0:Merge remote-tracking branch 'aosp/upstream-master...
I752a0b0ba:ART: Fix breaking changes from recent VIXL update....
Bug: 171879890
Change-Id: I6f0f5d1e176e2069301685eeb95a3c76364226ff
Diffstat (limited to 'src')
36 files changed, 4027 insertions, 35236 deletions
diff --git a/src/aarch32/disasm-aarch32.cc b/src/aarch32/disasm-aarch32.cc index 535f60c8..9ed3a831 100644 --- a/src/aarch32/disasm-aarch32.cc +++ b/src/aarch32/disasm-aarch32.cc @@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) { UnallocatedT32(instr); return; } - unsigned first_cond = (instr >> 20) & 0xf; + unsigned firstcond = (instr >> 20) & 0xf; unsigned mask = (instr >> 16) & 0xf; - bool was_in_it_block = InITBlock(); - SetIT(Condition(first_cond), mask); - it(Condition(first_cond), mask); - if (was_in_it_block || (first_cond == 15) || - ((first_cond == al) && + bool wasInITBlock = InITBlock(); + SetIT(Condition(firstcond), mask); + it(Condition(firstcond), mask); + if (wasInITBlock || (firstcond == 15) || + ((firstcond == al) && (BitCount(Uint32(mask)) != 1))) { UnpredictableT32(instr); } diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h index 6d76642f..d0ff52b3 100644 --- a/src/aarch32/macro-assembler-aarch32.h +++ b/src/aarch32/macro-assembler-aarch32.h @@ -268,8 +268,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - true); + SetAllowMacroInstructions(true); #else USE(allow_macro_instructions_); #endif @@ -284,8 +283,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - true); + SetAllowMacroInstructions(true); #endif } MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA) @@ -298,8 +296,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE), pool_end_(NULL) { #ifdef VIXL_DEBUG - SetAllowMacroInstructions( // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - true); + SetAllowMacroInstructions(true); #endif } diff --git a/src/aarch32/operands-aarch32.h b/src/aarch32/operands-aarch32.h index 2b452958..1d18bfd3 100644 --- a/src/aarch32/operands-aarch32.h +++ b/src/aarch32/operands-aarch32.h @@ -54,16 +54,28 @@ class Operand { // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. Operand(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {} + : imm_(immediate), + rm_(NoReg), + shift_(LSL), + amount_(0), + rs_(NoReg) {} Operand(int32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {} + : imm_(immediate), + rm_(NoReg), + shift_(LSL), + amount_(0), + rs_(NoReg) {} // rm // where rm is the base register // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. Operand(Register rm) // NOLINT(runtime/explicit) - : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) { + : imm_(0), + rm_(rm), + shift_(LSL), + amount_(0), + rs_(NoReg) { VIXL_ASSERT(rm_.IsValid()); } @@ -233,18 +245,22 @@ class NeonImmediate { // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(I32) {} + : imm_(immediate), + immediate_type_(I32) {} NeonImmediate(int immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(I32) {} + : imm_(immediate), + immediate_type_(I32) {} // { #<immediate> } // where <immediate> is a 64 bit number // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(int64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(I64) {} + : imm_(immediate), + immediate_type_(I64) {} NeonImmediate(uint64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(I64) {} + : imm_(immediate), + immediate_type_(I64) {} // { #<immediate> } // where <immediate> is a non zero floating point number which can be encoded @@ -252,9 +268,11 @@ class NeonImmediate { // This is allowed to be an implicit constructor because NeonImmediate is // a wrapper class that doesn't normally perform any type conversion. NeonImmediate(float immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(F32) {} + : imm_(immediate), + immediate_type_(F32) {} NeonImmediate(double immediate) // NOLINT(runtime/explicit) - : imm_(immediate), immediate_type_(F64) {} + : imm_(immediate), + immediate_type_(F64) {} NeonImmediate(const NeonImmediate& src) : imm_(src.imm_), immediate_type_(src.immediate_type_) {} @@ -356,21 +374,29 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand); class NeonOperand { public: NeonOperand(int32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(uint32_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(int64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(uint64_t immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(float immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(double immediate) // NOLINT(runtime/explicit) - : imm_(immediate), rm_(NoDReg) {} + : imm_(immediate), + rm_(NoDReg) {} NeonOperand(const NeonImmediate& imm) // NOLINT(runtime/explicit) - : imm_(imm), rm_(NoDReg) {} + : imm_(imm), + rm_(NoDReg) {} NeonOperand(const VRegister& rm) // NOLINT(runtime/explicit) - : imm_(0), rm_(rm) { + : imm_(0), + rm_(rm) { VIXL_ASSERT(rm_.IsValid()); } diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index e98de89b..9e73ffaa 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -1044,7 +1044,7 @@ void Assembler::cls(const Register& rd, const Register& rn) { V(auti, AUTI) \ V(autd, AUTD) -#define VIXL_DEFINE_ASM_FUNC(PRE, OP) \ +#define DEFINE_ASM_FUNCS(PRE, OP) \ void Assembler::PRE##a(const Register& xd, const Register& xn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \ @@ -1069,8 +1069,8 @@ void Assembler::cls(const Register& rd, const Register& rn) { Emit(SF(xd) | OP##ZB | Rd(xd)); \ } -PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +PAUTH_VARIATIONS(DEFINE_ASM_FUNCS) +#undef DEFINE_ASM_FUNCS void Assembler::pacga(const Register& xd, const Register& xn, @@ -1141,13 +1141,7 @@ void Assembler::LoadStorePair(const CPURegister& rt, addrmodeop = LoadStorePairPostIndexFixed; } } - - Instr emitop = addrmodeop | memop; - - // Only X registers may be specified for ldpsw. - VIXL_ASSERT(((emitop & LoadStorePairMask) != LDPSW_x) || rt.IsX()); - - Emit(emitop); + Emit(addrmodeop | memop); } @@ -1387,14 +1381,8 @@ void Assembler::ldr(const CPURegister& rt, int64_t imm19) { } -void Assembler::prfm(int op, int64_t imm19) { - Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19)); -} - void Assembler::prfm(PrefetchOperation op, int64_t imm19) { - // Passing unnamed values in 'op' is undefined behaviour in C++. - VIXL_ASSERT(IsNamedPrefetchOperation(op)); - prfm(static_cast<int>(op), imm19); + Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19)); } @@ -1647,18 +1635,17 @@ void Assembler::ldlar(const Register& rt, const MemOperand& src) { V(casal, CASAL) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rt, \ const MemOperand& src) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ - VIXL_ASSERT(AreSameFormat(rs, rt)); \ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off #define COMPARE_AND_SWAP_W_LIST(V) \ @@ -1672,7 +1659,7 @@ COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC) V(casalh, CASALH) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rt, \ const MemOperand& src) { \ @@ -1680,8 +1667,8 @@ COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC) VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -1692,7 +1679,7 @@ COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC) V(caspal, CASPAL) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const Register& rs, \ const Register& rs1, \ const Register& rt, \ @@ -1704,12 +1691,11 @@ COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC) VIXL_ASSERT(AreEven(rs, rt)); \ VIXL_ASSERT(AreConsecutive(rs, rs1)); \ VIXL_ASSERT(AreConsecutive(rt, rt1)); \ - VIXL_ASSERT(AreSameFormat(rs, rs1, rt, rt1)); \ LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ } -COMPARE_AND_SWAP_PAIR_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // These macros generate all the variations of the atomic memory operations, // e.g. ldadd, ldadda, ldaddb, staddl, etc. @@ -1860,7 +1846,7 @@ void Assembler::ldapursw(const Register& rt, const MemOperand& src) { Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast<int>(offset))); } -void Assembler::prfm(int op, +void Assembler::prfm(PrefetchOperation op, const MemOperand& address, LoadStoreScalingOption option) { VIXL_ASSERT(option != RequireUnscaledOffset); @@ -1868,16 +1854,8 @@ void Assembler::prfm(int op, Prefetch(op, address, option); } -void Assembler::prfm(PrefetchOperation op, - const MemOperand& address, - LoadStoreScalingOption option) { - // Passing unnamed values in 'op' is undefined behaviour in C++. - VIXL_ASSERT(IsNamedPrefetchOperation(op)); - prfm(static_cast<int>(op), address, option); -} - -void Assembler::prfum(int op, +void Assembler::prfum(PrefetchOperation op, const MemOperand& address, LoadStoreScalingOption option) { VIXL_ASSERT(option != RequireScaledOffset); @@ -1885,23 +1863,9 @@ void Assembler::prfum(int op, Prefetch(op, address, option); } -void Assembler::prfum(PrefetchOperation op, - const MemOperand& address, - LoadStoreScalingOption option) { - // Passing unnamed values in 'op' is undefined behaviour in C++. - VIXL_ASSERT(IsNamedPrefetchOperation(op)); - prfum(static_cast<int>(op), address, option); -} - - -void Assembler::prfm(int op, RawLiteral* literal) { - prfm(op, static_cast<int>(LinkAndGetWordOffsetTo(literal))); -} void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) { - // Passing unnamed values in 'op' is undefined behaviour in C++. - VIXL_ASSERT(IsNamedPrefetchOperation(op)); - prfm(static_cast<int>(op), literal); + prfm(op, static_cast<int>(LinkAndGetWordOffsetTo(literal))); } @@ -1969,7 +1933,6 @@ void Assembler::LoadStoreStructVerify(const VRegister& vt, // Assert that addressing mode is either offset (with immediate 0), post // index by immediate of the size of the register list, or post index by a // value in a core register. - VIXL_ASSERT(vt.HasSize() && vt.HasLaneSize()); if (addr.IsImmediateOffset()) { VIXL_ASSERT(addr.GetOffset() == 0); } else { @@ -2327,7 +2290,6 @@ void Assembler::LoadStoreStructSingle(const VRegister& vt, // We support vt arguments of the form vt.VxT() or vt.T(), where x is the // number of lanes, and T is b, h, s or d. unsigned lane_size = vt.GetLaneSizeInBytes(); - VIXL_ASSERT(lane_size > 0); VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size)); // Lane size is encoded in the opcode field. Lane index is encoded in the Q, @@ -2462,7 +2424,7 @@ void Assembler::NEON3DifferentHN(const VRegister& vd, // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +#define DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -2470,8 +2432,8 @@ void Assembler::FN(const VRegister& vd, \ VIXL_ASSERT(AS); \ NEON3DifferentL(vd, vn, vm, OP); \ } -NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off #define NEON_3DIFF_HN_LIST(V) \ @@ -2485,7 +2447,7 @@ NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC) V(rsubhn2, NEON_RSUBHN2, vd.IsQ()) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +#define DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -2493,8 +2455,8 @@ NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC) VIXL_ASSERT(AS); \ NEON3DifferentHN(vd, vn, vm, OP); \ } -NEON_3DIFF_HN_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC void Assembler::uaddw(const VRegister& vd, const VRegister& vn, @@ -3142,7 +3104,7 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd, V(fcvtau, NEON_FCVTAU, FCVTAU) \ V(fcvtas, NEON_FCVTAS, FCVTAS) -#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ +#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \ void Assembler::FN(const Register& rd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \ @@ -3157,8 +3119,8 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd, NEONFPConvertToInt(vd, vn, VEC_OP); \ } \ } -NEON_FP2REGMISC_FCVT_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS) +#undef DEFINE_ASM_FUNCS void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) { @@ -3346,7 +3308,7 @@ void Assembler::NEON3SameFP16(const VRegister& vd, V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar, NEON_FRECPE_H_scalar) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ +#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ Instr op; \ @@ -3386,8 +3348,8 @@ void Assembler::NEON3SameFP16(const VRegister& vd, NEONFP2RegMisc(vd, vn, op); \ } \ } -NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off #define NEON_FP2REGMISC_V85_LIST(V) \ @@ -3397,7 +3359,7 @@ NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC) V(frint64z, NEON_FRINT64Z, FRINT64Z) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ +#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt)); \ Instr op; \ @@ -3411,8 +3373,8 @@ NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC) } \ NEONFP2RegMisc(vd, vn, op); \ } -NEON_FP2REGMISC_V85_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FP2REGMISC_V85_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC void Assembler::NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, @@ -3676,7 +3638,7 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) { V(uqrshl, NEON_UQRSHL, true) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +#define DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3684,8 +3646,8 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(AS); \ NEON3Same(vd, vn, vm, OP); \ } -NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_3SAME_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off #define NEON_FP3SAME_OP_LIST(V) \ @@ -3718,7 +3680,7 @@ NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC) // TODO: This macro is complicated because it classifies the instructions in the // macro list above, and treats each case differently. It could be somewhat // simpler if we were to split the macro, at the cost of some duplication. -#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ +#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3758,8 +3720,8 @@ NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC) NEONFP3Same(vd, vn, vm, op); \ } \ } -NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -3770,7 +3732,7 @@ NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC) V(fmlsl2, NEON_FMLSL2) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP) \ +#define DEFINE_ASM_FUNC(FN, VEC_OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm) { \ @@ -3782,8 +3744,8 @@ NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC) (vd.Is4S() && vn.Is4H() && vm.Is4H())); \ Emit(FPFormat(vd) | VEC_OP | Rm(vm) | Rn(vn) | Rd(vd)); \ } -NEON_FHM_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FHM_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC void Assembler::addp(const VRegister& vd, const VRegister& vn) { @@ -4176,7 +4138,7 @@ void Assembler::udot(const VRegister& vd, V(sqrdmulh, NEON_SQRDMULH_byelement, true) \ // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +#define DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4185,8 +4147,8 @@ void Assembler::udot(const VRegister& vd, VIXL_ASSERT(AS); \ NEONByElement(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -4195,7 +4157,7 @@ NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) V(sqrdmlsh, NEON_SQRDMLSH_byelement) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4203,8 +4165,8 @@ NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); \ NEONByElement(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -4215,7 +4177,7 @@ NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC) V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ +#define DEFINE_ASM_FUNC(FN, OP, OP_H) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4224,8 +4186,8 @@ NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC) if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H); \ } -NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -4251,7 +4213,7 @@ NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +#define DEFINE_ASM_FUNC(FN, OP, AS) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4260,8 +4222,8 @@ NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) VIXL_ASSERT(AS); \ NEONByElementL(vd, vn, vm, vm_index, OP); \ } -NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -4273,7 +4235,7 @@ NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, \ const VRegister& vn, \ const VRegister& vm, \ @@ -4290,8 +4252,8 @@ NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC) Emit(FPFormat(vd) | OP | Rd(vd) | Rn(vn) | Rm(vm) | \ ImmNEONHLM(vm_index, 3)); \ } -NEON_BYELEMENT_FHM_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_BYELEMENT_FHM_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC void Assembler::suqadd(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); @@ -4801,13 +4763,13 @@ void Assembler::NEONAcrossLanes(const VRegister& vd, V(uminv, NEON_UMINV) // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ +#define DEFINE_ASM_FUNC(FN, OP) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ NEONAcrossLanes(vd, vn, OP, 0); \ } -NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC // clang-format off @@ -4818,15 +4780,15 @@ NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC) V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \ // clang-format on -#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ +#define DEFINE_ASM_FUNC(FN, OP, OP_H) \ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ VIXL_ASSERT(vd.Is1S() || vd.Is1H()); \ NEONAcrossLanes(vd, vn, OP, OP_H); \ } -NEON_ACROSSLANES_FP_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC +NEON_ACROSSLANES_FP_LIST(DEFINE_ASM_FUNC) +#undef DEFINE_ASM_FUNC void Assembler::NEONPerm(const VRegister& vd, @@ -4908,9 +4870,9 @@ void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int lane_size_in_bits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); - NEONShiftImmediate(vd, vn, op, (lane_size_in_bits + shift) << 16); + int laneSizeInBits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); + NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16); } @@ -4918,9 +4880,9 @@ void Assembler::NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int lane_size_in_bits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); - NEONShiftImmediate(vd, vn, op, ((2 * lane_size_in_bits) - shift) << 16); + int laneSizeInBits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); + NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16); } @@ -4928,9 +4890,9 @@ void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, NEONShiftImmediateOp op) { - int lane_size_in_bits = vn.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); - int immh_immb = (lane_size_in_bits + shift) << 16; + int laneSizeInBits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits)); + int immh_immb = (laneSizeInBits + shift) << 16; VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) || (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) || @@ -4946,9 +4908,9 @@ void Assembler::NEONShiftImmediateN(const VRegister& vd, int shift, NEONShiftImmediateOp op) { Instr q, scalar; - int lane_size_in_bits = vd.GetLaneSizeInBits(); - VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); - int immh_immb = (2 * lane_size_in_bits - shift) << 16; + int laneSizeInBits = vd.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits)); + int immh_immb = (2 * laneSizeInBits - shift) << 16; if (vn.IsScalar()) { VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) || @@ -5309,7 +5271,6 @@ void Assembler::MoveWide(const Register& rd, } else { // Calculate a new immediate and shift combination to encode the immediate // argument. - VIXL_ASSERT(shift == -1); shift = 0; if ((imm & 0xffffffffffff0000) == 0) { // Nothing to do. @@ -5643,7 +5604,7 @@ void Assembler::DataProcExtendedRegister(const Register& rd, Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, - unsigned access_size_in_bytes_log2, + unsigned access_size, LoadStoreScalingOption option) { Instr base = RnSP(addr.GetBaseRegister()); int64_t offset = addr.GetOffset(); @@ -5653,22 +5614,21 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, (option == PreferUnscaledOffset) || (option == RequireUnscaledOffset); if (prefer_unscaled && IsImmLSUnscaled(offset)) { // Use the unscaled addressing mode. - return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); + return base | LoadStoreUnscaledOffsetFixed | + ImmLS(static_cast<int>(offset)); } if ((option != RequireUnscaledOffset) && - IsImmLSScaled(offset, access_size_in_bytes_log2)) { - // We need `offset` to be positive for the shift to be well-defined. - // IsImmLSScaled should check this. - VIXL_ASSERT(offset >= 0); + IsImmLSScaled(offset, access_size)) { // Use the scaled addressing mode. return base | LoadStoreUnsignedOffsetFixed | - ImmLSUnsigned(offset >> access_size_in_bytes_log2); + ImmLSUnsigned(static_cast<int>(offset) >> access_size); } if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) { // Use the unscaled addressing mode. - return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); + return base | LoadStoreUnscaledOffsetFixed | + ImmLS(static_cast<int>(offset)); } } @@ -5689,17 +5649,17 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, // Shifts are encoded in one bit, indicating a left shift by the memory // access size. - VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size_in_bytes_log2)); + VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size)); return base | LoadStoreRegisterOffsetFixed | Rm(addr.GetRegisterOffset()) | ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0); } if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) { - return base | LoadStorePreIndexFixed | ImmLS(offset); + return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset)); } if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) { - return base | LoadStorePostIndexFixed | ImmLS(offset); + return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset)); } // If this point is reached, the MemOperand (addr) cannot be encoded. @@ -5734,7 +5694,7 @@ void Assembler::LoadStorePAC(const Register& xt, } -void Assembler::Prefetch(int op, +void Assembler::Prefetch(PrefetchOperation op, const MemOperand& addr, LoadStoreScalingOption option) { VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset()); @@ -5743,14 +5703,6 @@ void Assembler::Prefetch(int op, Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option)); } -void Assembler::Prefetch(PrefetchOperation op, - const MemOperand& addr, - LoadStoreScalingOption option) { - // Passing unnamed values in 'op' is undefined behaviour in C++. - VIXL_ASSERT(IsNamedPrefetchOperation(op)); - Prefetch(static_cast<int>(op), addr, option); -} - bool Assembler::IsImmAddSub(int64_t immediate) { return IsUint12(immediate) || @@ -5836,17 +5788,17 @@ bool Assembler::IsImmFP64(double imm) { } -bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) { - VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); - return IsMultiple(offset, 1 << access_size_in_bytes_log2) && - IsInt7(offset / (1 << access_size_in_bytes_log2)); +bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) { + VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); + return IsMultiple(offset, 1 << access_size) && + IsInt7(offset / (1 << access_size)); } -bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) { - VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); - return IsMultiple(offset, 1 << access_size_in_bytes_log2) && - IsUint12(offset / (1 << access_size_in_bytes_log2)); +bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) { + VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2); + return IsMultiple(offset, 1 << access_size) && + IsUint12(offset / (1 << access_size)); } @@ -5880,8 +5832,7 @@ bool Assembler::IsImmLogical(uint64_t value, unsigned* n, unsigned* imm_s, unsigned* imm_r) { - VIXL_ASSERT((width == kBRegSize) || (width == kHRegSize) || - (width == kSRegSize) || (width == kDRegSize)); + VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize)); bool negate = false; @@ -5922,18 +5873,16 @@ bool Assembler::IsImmLogical(uint64_t value, value = ~value; } - if (width <= kWRegSize) { - // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat - // the input value to fill a 64-bit word. The correct encoding of that as a - // logical immediate will also be the correct encoding of the value. + if (width == kWRegSize) { + // To handle 32-bit logical immediates, the very easiest thing is to repeat + // the input value twice to make a 64-bit word. The correct encoding of that + // as a logical immediate will also be the correct encoding of the 32-bit + // value. - // Avoid making the assumption that the most-significant 56/48/32 bits are zero by + // Avoid making the assumption that the most-significant 32 bits are zero by // shifting the value left and duplicating it. - for (unsigned bits = width; bits <= kWRegSize; bits *= 2) { - value <<= bits; - uint64_t mask = (UINT64_C(1) << bits) - 1; - value |= ((value >> bits) & mask); - } + value <<= kWRegSize; + value |= value >> kWRegSize; } // The basic analysis idea: imagine our input word looks like this. @@ -6237,5 +6186,152 @@ bool Assembler::CPUHas(SystemRegister sysreg) const { } +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + int number_of_valid_regs = 0; + int number_of_valid_fpregs = 0; + + RegList unique_regs = 0; + RegList unique_fpregs = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + if (regs[i].IsRegister()) { + number_of_valid_regs++; + unique_regs |= regs[i].GetBit(); + } else if (regs[i].IsVRegister()) { + number_of_valid_fpregs++; + unique_fpregs |= regs[i].GetBit(); + } else { + VIXL_ASSERT(!regs[i].IsValid()); + } + } + + int number_of_unique_regs = CountSetBits(unique_regs); + int number_of_unique_fpregs = CountSetBits(unique_fpregs); + + VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); + VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs); + + return (number_of_valid_regs != number_of_unique_regs) || + (number_of_valid_fpregs != number_of_unique_fpregs); +} + + +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); + match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); + match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); + match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); + match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); + match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); + match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); + return match; +} + +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool even = (reg1.GetCode() % 2) == 0; + even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0); + even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0); + even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0); + even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0); + even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0); + even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0); + even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0); + return even; +} + + +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + + if (!reg2.IsValid()) { + return true; + } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfRegisters)) { + return false; + } + + if (!reg3.IsValid()) { + return true; + } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfRegisters)) { + return false; + } + + if (!reg4.IsValid()) { + return true; + } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfRegisters)) { + return false; + } + + return true; +} + + +bool AreSameFormat(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3, + const VRegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); + match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); + match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); + return match; +} + + +bool AreConsecutive(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3, + const VRegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + + if (!reg2.IsValid()) { + return true; + } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfVRegisters)) { + return false; + } + + if (!reg3.IsValid()) { + return true; + } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfVRegisters)) { + return false; + } + + if (!reg4.IsValid()) { + return true; + } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfVRegisters)) { + return false; + } + + return true; +} } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index f7aafd07..3ccda1a6 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -1089,6 +1089,18 @@ class Assembler : public vixl::internal::AssemblerBase { // zero [Armv8.3]. void pacdza(const Register& xd); + // Pointer Authentication Code for Data address, using key A, with address in + // x17 and modifier in x16 [Armv8.3]. + void pacda1716(); + + // Pointer Authentication Code for Data address, using key A, with address in + // LR and modifier in SP [Armv8.3]. + void pacdasp(); + + // Pointer Authentication Code for Data address, using key A, with address in + // LR and a modifier of zero [Armv8.3]. + void pacdaz(); + // Pointer Authentication Code for Data address, using key B [Armv8.3]. void pacdb(const Register& xd, const Register& xn); @@ -1096,6 +1108,18 @@ class Assembler : public vixl::internal::AssemblerBase { // zero [Armv8.3]. void pacdzb(const Register& xd); + // Pointer Authentication Code for Data address, using key B, with address in + // x17 and modifier in x16 [Armv8.3]. + void pacdb1716(); + + // Pointer Authentication Code for Data address, using key B, with address in + // LR and modifier in SP [Armv8.3]. + void pacdbsp(); + + // Pointer Authentication Code for Data address, using key B, with address in + // LR and a modifier of zero [Armv8.3]. + void pacdbz(); + // Pointer Authentication Code, using Generic key [Armv8.3]. void pacga(const Register& xd, const Register& xn, const Register& xm); @@ -1143,12 +1167,36 @@ class Assembler : public vixl::internal::AssemblerBase { // Authenticate Data address, using key A and a modifier of zero [Armv8.3]. void autdza(const Register& xd); + // Authenticate Data address, using key A, with address in x17 and modifier in + // x16 [Armv8.3]. + void autda1716(); + + // Authenticate Data address, using key A, with address in LR and modifier in + // SP [Armv8.3]. + void autdasp(); + + // Authenticate Data address, using key A, with address in LR and a modifier + // of zero [Armv8.3]. + void autdaz(); + // Authenticate Data address, using key B [Armv8.3]. void autdb(const Register& xd, const Register& xn); // Authenticate Data address, using key B and a modifier of zero [Armv8.3]. void autdzb(const Register& xd); + // Authenticate Data address, using key B, with address in x17 and modifier in + // x16 [Armv8.3]. + void autdb1716(); + + // Authenticate Data address, using key B, with address in LR and modifier in + // SP [Armv8.3]. + void autdbsp(); + + // Authenticate Data address, using key B, with address in LR and a modifier + // of zero [Armv8.3]. + void autdbz(); + // Strip Pointer Authentication Code of Data address [Armv8.3]. void xpacd(const Register& xd); @@ -2064,22 +2112,6 @@ class Assembler : public vixl::internal::AssemblerBase { // Prefetch from pc + imm19 << 2. void prfm(PrefetchOperation op, int64_t imm19); - // Prefetch memory (allowing unallocated hints). - void prfm(int op, - const MemOperand& addr, - LoadStoreScalingOption option = PreferScaledOffset); - - // Prefetch memory (with unscaled offset, allowing unallocated hints). - void prfum(int op, - const MemOperand& addr, - LoadStoreScalingOption option = PreferUnscaledOffset); - - // Prefetch memory in the literal pool (allowing unallocated hints). - void prfm(int op, RawLiteral* literal); - - // Prefetch from pc + imm19 << 2 (allowing unallocated hints). - void prfm(int op, int64_t imm19); - // Move instructions. The default shift of -1 indicates that the move // instruction will calculate an appropriate 16-bit immediate and left shift // that is equal to the 64-bit immediate argument. If an explicit left shift @@ -3586,2240 +3618,6 @@ class Assembler : public vixl::internal::AssemblerBase { const VRegister& vm, int rot); - // Scalable Vector Extensions. - - // Absolute value (predicated). - void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Add vectors (predicated). - void add(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Add vectors (unpredicated). - void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Add immediate (unpredicated). - void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); - - // Add multiple of predicate register size to scalar register. - void addpl(const Register& xd, const Register& xn, int imm6); - - // Add multiple of vector register size to scalar register. - void addvl(const Register& xd, const Register& xn, int imm6); - - // Compute vector address. - void adr(const ZRegister& zd, const SVEMemOperand& addr); - - // Bitwise AND predicates. - void and_(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise AND vectors (predicated). - void and_(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise AND with immediate (unpredicated). - void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise AND vectors (unpredicated). - void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Bitwise AND predicates. - void ands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise AND reduction to scalar. - void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Arithmetic shift right by immediate (predicated). - void asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift); - - // Arithmetic shift right by 64-bit wide elements (predicated). - void asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Arithmetic shift right by immediate (unpredicated). - void asr(const ZRegister& zd, const ZRegister& zn, int shift); - - // Arithmetic shift right by 64-bit wide elements (unpredicated). - void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Arithmetic shift right for divide by immediate (predicated). - void asrd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift); - - // Reversed arithmetic shift right by vector (predicated). - void asrr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise clear predicates. - void bic(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise clear vectors (predicated). - void bic(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise clear bits using immediate (unpredicated). - void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise clear vectors (unpredicated). - void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Bitwise clear predicates. - void bics(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Break after first true condition. - void brka(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - - // Break after first true condition. - void brkas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Break before first true condition. - void brkb(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - - // Break before first true condition. - void brkbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Propagate break to next partition. - void brkn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Propagate break to next partition. - void brkns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Break after first true condition, propagating from previous partition. - void brkpa(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Break after first true condition, propagating from previous partition. - void brkpas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Break before first true condition, propagating from previous partition. - void brkpb(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Break before first true condition, propagating from previous partition. - void brkpbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Conditionally extract element after last to general-purpose register. - void clasta(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm); - - // Conditionally extract element after last to SIMD&FP scalar register. - void clasta(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm); - - // Conditionally extract element after last to vector register. - void clasta(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Conditionally extract last element to general-purpose register. - void clastb(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm); - - // Conditionally extract last element to SIMD&FP scalar register. - void clastb(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm); - - // Conditionally extract last element to vector register. - void clastb(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Count leading sign bits (predicated). - void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Count leading zero bits (predicated). - void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - void cmp(Condition cond, - const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to 64-bit wide elements. - void cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Compare vector to 64-bit wide elements. - void cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Compare vector to 64-bit wide elements. - void cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Compare vector to 64-bit wide elements. - void cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7); - - // Compare vector to 64-bit wide elements. - void cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7); - - // Compare vector to 64-bit wide elements. - void cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Compare vector to 64-bit wide elements. - void cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7); - - // Compare vector to 64-bit wide elements. - void cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7); - - // Compare vector to 64-bit wide elements. - void cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Compare vector to 64-bit wide elements. - void cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Compare vector to immediate. - void cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Logically invert boolean condition in vector (predicated). - void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Count non-zero bits (predicated). - void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Set scalar to multiple of predicate constraint element count. - void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); - - // Set scalar to multiple of predicate constraint element count. - void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); - - // Set scalar to multiple of predicate constraint element count. - void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); - - // Set scalar to active predicate element count. - void cntp(const Register& xd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - - // Set scalar to multiple of predicate constraint element count. - void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); - - // Shuffle active elements of vector to the right and fill with zero. - void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); - - // Copy signed integer immediate to vector elements (predicated). - void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); - - // Copy general-purpose register to vector elements (predicated). - void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn); - - // Copy SIMD&FP scalar register to vector elements (predicated). - void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); - - // Compare and terminate loop. - void ctermeq(const Register& rn, const Register& rm); - - // Compare and terminate loop. - void ctermne(const Register& rn, const Register& rm); - - // Decrement scalar by multiple of predicate constraint element count. - void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement scalar by multiple of predicate constraint element count. - void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement vector by multiple of predicate constraint element count. - void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement scalar by multiple of predicate constraint element count. - void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement vector by multiple of predicate constraint element count. - void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement scalar by active predicate element count. - void decp(const Register& rdn, const PRegisterWithLaneSize& pg); - - // Decrement vector by active predicate element count. - void decp(const ZRegister& zdn, const PRegister& pg); - - // Decrement scalar by multiple of predicate constraint element count. - void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Decrement vector by multiple of predicate constraint element count. - void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Broadcast general-purpose register to vector elements (unpredicated). - void dup(const ZRegister& zd, const Register& xn); - - // Broadcast indexed element to vector (unpredicated). - void dup(const ZRegister& zd, const ZRegister& zn, unsigned index); - - // As for movz/movk/movn, if the default shift of -1 is specified to dup, the - // assembler will pick an appropriate immediate and left shift that is - // equivalent to the immediate argument. If an explicit left shift is - // specified (0 or 8), the immediate must be a signed 8-bit integer. - - // Broadcast signed immediate to vector elements (unpredicated). - void dup(const ZRegister& zd, int imm8, int shift = -1); - - // Broadcast logical bitmask immediate to vector (unpredicated). - void dupm(const ZRegister& zd, uint64_t imm); - - // Bitwise exclusive OR with inverted immediate (unpredicated). - void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise exclusive OR predicates. - void eor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise exclusive OR vectors (predicated). - void eor(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise exclusive OR with immediate (unpredicated). - void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise exclusive OR vectors (unpredicated). - void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Bitwise exclusive OR predicates. - void eors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise XOR reduction to scalar. - void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Extract vector from pair of vectors. - void ext(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned offset); - - // Floating-point absolute difference (predicated). - void fabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point absolute value (predicated). - void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point absolute compare vectors. - void facge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point absolute compare vectors. - void facgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point add immediate (predicated). - void fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point add vector (predicated). - void fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point add vector (unpredicated). - void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point add strictly-ordered reduction, accumulating in scalar. - void fadda(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm); - - // Floating-point add recursive reduction to scalar. - void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Floating-point complex add with rotate (predicated). - void fcadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot); - - // Floating-point compare vector with zero. - void fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vectors. - void fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point compare vector with zero. - void fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vectors. - void fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point compare vector with zero. - void fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vectors. - void fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point complex multiply-add with rotate (predicated). - void fcmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot); - - // Floating-point complex multiply-add by indexed values with rotate. - void fcmla(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index, - int rot); - - // Floating-point compare vector with zero. - void fcmle(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vector with zero. - void fcmlt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vector with zero. - void fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero); - - // Floating-point compare vectors. - void fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point compare vectors. - void fcmuo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Copy floating-point immediate to vector elements (predicated). - void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); - - // Copy half-precision floating-point immediate to vector elements - // (predicated). - void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { - fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN)); - } - - // Floating-point convert precision (predicated). - void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point convert to signed integer, rounding toward zero - // (predicated). - void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point convert to unsigned integer, rounding toward zero - // (predicated). - void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point divide by vector (predicated). - void fdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point reversed divide by vector (predicated). - void fdivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Broadcast floating-point immediate to vector elements. - void fdup(const ZRegister& zd, double imm); - - // Broadcast half-precision floating-point immediate to vector elements. - void fdup(const ZRegister& zd, Float16 imm) { - fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); - } - - // Floating-point exponential accelerator. - void fexpa(const ZRegister& zd, const ZRegister& zn); - - // Floating-point fused multiply-add vectors (predicated), writing - // multiplicand [Zdn = Za + Zdn * Zm]. - void fmad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Floating-point maximum with immediate (predicated). - void fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point maximum (predicated). - void fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point maximum number with immediate (predicated). - void fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point maximum number (predicated). - void fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point maximum number recursive reduction to scalar. - void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Floating-point maximum recursive reduction to scalar. - void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Floating-point minimum with immediate (predicated). - void fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point minimum (predicated). - void fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point minimum number with immediate (predicated). - void fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point minimum number (predicated). - void fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point minimum number recursive reduction to scalar. - void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Floating-point minimum recursive reduction to scalar. - void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Floating-point fused multiply-add vectors (predicated), writing addend - // [Zda = Zda + Zn * Zm]. - void fmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point fused multiply-add by indexed elements - // (Zda = Zda + Zn * Zm[indexed]). - void fmla(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index); - - // Floating-point fused multiply-subtract vectors (predicated), writing - // addend [Zda = Zda + -Zn * Zm]. - void fmls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point fused multiply-subtract by indexed elements - // (Zda = Zda + -Zn * Zm[indexed]). - void fmls(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index); - - // Move 8-bit floating-point immediate to vector elements (unpredicated). - void fmov(const ZRegister& zd, double imm); - - // Move 8-bit floating-point immediate to vector elements (predicated). - void fmov(const ZRegister& zd, const PRegisterM& pg, double imm); - - // Floating-point fused multiply-subtract vectors (predicated), writing - // multiplicand [Zdn = Za + -Zdn * Zm]. - void fmsb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Floating-point multiply by immediate (predicated). - void fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point multiply vectors (predicated). - void fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point multiply by indexed elements. - void fmul(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned index); - - // Floating-point multiply vectors (unpredicated). - void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point multiply-extended vectors (predicated). - void fmulx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point negate (predicated). - void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point negated fused multiply-add vectors (predicated), writing - // multiplicand [Zdn = -Za + -Zdn * Zm]. - void fnmad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Floating-point negated fused multiply-add vectors (predicated), writing - // addend [Zda = -Zda + -Zn * Zm]. - void fnmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point negated fused multiply-subtract vectors (predicated), - // writing addend [Zda = -Zda + Zn * Zm]. - void fnmls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point negated fused multiply-subtract vectors (predicated), - // writing multiplicand [Zdn = -Za + Zdn * Zm]. - void fnmsb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Floating-point reciprocal estimate (unpredicated). - void frecpe(const ZRegister& zd, const ZRegister& zn); - - // Floating-point reciprocal step (unpredicated). - void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point reciprocal exponent (predicated). - void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point round to integral value (predicated). - void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point reciprocal square root estimate (unpredicated). - void frsqrte(const ZRegister& zd, const ZRegister& zn); - - // Floating-point reciprocal square root step (unpredicated). - void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point adjust exponent by vector (predicated). - void fscale(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point square root (predicated). - void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Floating-point subtract immediate (predicated). - void fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point subtract vectors (predicated). - void fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point subtract vectors (unpredicated). - void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point reversed subtract from immediate (predicated). - void fsubr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm); - - // Floating-point reversed subtract vectors (predicated). - void fsubr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point trigonometric multiply-add coefficient. - void ftmad(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int imm3); - - // Floating-point trigonometric starting value. - void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Floating-point trigonometric select coefficient. - void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Increment scalar by multiple of predicate constraint element count. - void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment scalar by multiple of predicate constraint element count. - void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment vector by multiple of predicate constraint element count. - void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment scalar by multiple of predicate constraint element count. - void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment vector by multiple of predicate constraint element count. - void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment scalar by active predicate element count. - void incp(const Register& rdn, const PRegisterWithLaneSize& pg); - - // Increment vector by active predicate element count. - void incp(const ZRegister& zdn, const PRegister& pg); - - // Increment scalar by multiple of predicate constraint element count. - void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); - - // Increment vector by multiple of predicate constraint element count. - void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Create index starting from and incremented by immediate. - void index(const ZRegister& zd, int start, int step); - - // Create index starting from and incremented by general-purpose register. - void index(const ZRegister& zd, const Register& rn, const Register& rm); - - // Create index starting from general-purpose register and incremented by - // immediate. - void index(const ZRegister& zd, const Register& rn, int imm5); - - // Create index starting from immediate and incremented by general-purpose - // register. - void index(const ZRegister& zd, int imm5, const Register& rm); - - // Insert general-purpose register in shifted vector. - void insr(const ZRegister& zdn, const Register& rm); - - // Insert SIMD&FP scalar register in shifted vector. - void insr(const ZRegister& zdn, const VRegister& vm); - - // Extract element after last to general-purpose register. - void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn); - - // Extract element after last to SIMD&FP scalar register. - void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Extract last element to general-purpose register. - void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn); - - // Extract last element to SIMD&FP scalar register. - void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Contiguous/gather load bytes to vector. - void ld1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load halfwords to vector. - void ld1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load words to vector. - void ld1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load doublewords to vector. - void ld1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // TODO: Merge other loads into the SVEMemOperand versions. - - // Load and broadcast unsigned byte to vector. - void ld1rb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast unsigned halfword to vector. - void ld1rh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast unsigned word to vector. - void ld1rw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast doubleword to vector. - void ld1rd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load and replicate sixteen bytes. - void ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load and replicate eight halfwords. - void ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load and replicate four words. - void ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load and replicate two doublewords. - void ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast signed byte to vector. - void ld1rsb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast signed halfword to vector. - void ld1rsh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load and broadcast signed word to vector. - void ld1rsw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load signed bytes to vector. - void ld1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load signed halfwords to vector. - void ld1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous/gather load signed words to vector. - void ld1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // TODO: Merge other loads into the SVEMemOperand versions. - - // Contiguous load two-byte structures to two vectors. - void ld2b(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load two-halfword structures to two vectors. - void ld2h(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load two-word structures to two vectors. - void ld2w(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load two-doubleword structures to two vectors. - void ld2d(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load three-byte structures to three vectors. - void ld3b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load three-halfword structures to three vectors. - void ld3h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load three-word structures to three vectors. - void ld3w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load three-doubleword structures to three vectors. - void ld3d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load four-byte structures to four vectors. - void ld4b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load four-halfword structures to four vectors. - void ld4h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load four-word structures to four vectors. - void ld4w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load four-doubleword structures to four vectors. - void ld4d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault unsigned bytes to vector. - void ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault unsigned halfwords to vector. - void ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault unsigned words to vector. - void ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault doublewords to vector. - void ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault signed bytes to vector. - void ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault signed halfwords to vector. - void ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load first-fault signed words to vector. - void ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Gather load first-fault unsigned bytes to vector. - void ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault unsigned bytes to vector (immediate index). - void ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault doublewords to vector (vector index). - void ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault doublewords to vector (immediate index). - void ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault unsigned halfwords to vector (vector index). - void ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault unsigned halfwords to vector (immediate index). - void ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault signed bytes to vector (vector index). - void ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault signed bytes to vector (immediate index). - void ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault signed halfwords to vector (vector index). - void ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault signed halfwords to vector (immediate index). - void ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault signed words to vector (vector index). - void ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault signed words to vector (immediate index). - void ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Gather load first-fault unsigned words to vector (vector index). - void ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm); - - // Gather load first-fault unsigned words to vector (immediate index). - void ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5); - - // Contiguous load non-fault unsigned bytes to vector (immediate index). - void ldnf1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault doublewords to vector (immediate index). - void ldnf1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault unsigned halfwords to vector (immediate - // index). - void ldnf1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault signed bytes to vector (immediate index). - void ldnf1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault signed halfwords to vector (immediate index). - void ldnf1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault signed words to vector (immediate index). - void ldnf1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-fault unsigned words to vector (immediate index). - void ldnf1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-temporal bytes to vector. - void ldnt1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-temporal halfwords to vector. - void ldnt1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-temporal words to vector. - void ldnt1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Contiguous load non-temporal doublewords to vector. - void ldnt1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Load SVE predicate/vector register. - void ldr(const CPURegister& rt, const SVEMemOperand& addr); - - // Logical shift left by immediate (predicated). - void lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift); - - // Logical shift left by 64-bit wide elements (predicated). - void lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Logical shift left by immediate (unpredicated). - void lsl(const ZRegister& zd, const ZRegister& zn, int shift); - - // Logical shift left by 64-bit wide elements (unpredicated). - void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Reversed logical shift left by vector (predicated). - void lslr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Logical shift right by immediate (predicated). - void lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift); - - // Logical shift right by 64-bit wide elements (predicated). - void lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Logical shift right by immediate (unpredicated). - void lsr(const ZRegister& zd, const ZRegister& zn, int shift); - - // Logical shift right by 64-bit wide elements (unpredicated). - void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Reversed logical shift right by vector (predicated). - void lsrr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise invert predicate. - void not_(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Bitwise invert predicate, setting the condition flags. - void nots(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Multiply-add vectors (predicated), writing multiplicand - // [Zdn = Za + Zdn * Zm]. - void mad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Multiply-add vectors (predicated), writing addend - // [Zda = Zda + Zn * Zm]. - void mla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Multiply-subtract vectors (predicated), writing addend - // [Zda = Zda - Zn * Zm]. - void mls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Move predicates (unpredicated) - void mov(const PRegister& pd, const PRegister& pn); - - // Move predicates (merging) - void mov(const PRegisterWithLaneSize& pd, - const PRegisterM& pg, - const PRegisterWithLaneSize& pn); - - // Move predicates (zeroing) - void mov(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Move general-purpose register to vector elements (unpredicated) - void mov(const ZRegister& zd, const Register& xn); - - // Move SIMD&FP scalar register to vector elements (unpredicated) - void mov(const ZRegister& zd, const VRegister& vn); - - // Move vector register (unpredicated) - void mov(const ZRegister& zd, const ZRegister& zn); - - // Move indexed element to vector elements (unpredicated) - void mov(const ZRegister& zd, const ZRegister& zn, unsigned index); - - // Move general-purpose register to vector elements (predicated) - void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn); - - // Move SIMD&FP scalar register to vector elements (predicated) - void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); - - // Move vector elements (predicated) - void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Move signed integer immediate to vector elements (predicated) - void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); - - // Move signed immediate to vector elements (unpredicated). - void mov(const ZRegister& zd, int imm8, int shift); - - // Move logical bitmask immediate to vector (unpredicated). - void mov(const ZRegister& zd, uint64_t imm); - - // Move predicate (unpredicated), setting the condition flags - void movs(const PRegister& pd, const PRegister& pn); - - // Move predicates (zeroing), setting the condition flags - void movs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn); - - // Move prefix (predicated). - void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); - - // Move prefix (unpredicated). - void movprfx(const ZRegister& zd, const ZRegister& zn); - - // Multiply-subtract vectors (predicated), writing multiplicand - // [Zdn = Za - Zdn * Zm]. - void msb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za); - - // Multiply vectors (predicated). - void mul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Multiply by immediate (unpredicated). - void mul(const ZRegister& zd, const ZRegister& zn, int imm8); - - // Bitwise NAND predicates. - void nand(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise NAND predicates. - void nands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Negate (predicated). - void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Bitwise NOR predicates. - void nor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise NOR predicates. - void nors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise invert vector (predicated). - void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Bitwise OR inverted predicate. - void orn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise OR inverted predicate. - void orns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise OR with inverted immediate (unpredicated). - void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise OR predicate. - void orr(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise OR vectors (predicated). - void orr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Bitwise OR with immediate (unpredicated). - void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm); - - // Bitwise OR vectors (unpredicated). - void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Bitwise OR predicate. - void orrs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Bitwise OR reduction to scalar. - void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Set all predicate elements to false. - void pfalse(const PRegisterWithLaneSize& pd); - - // Set the first active predicate element to true. - void pfirst(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - - // Find next active predicate. - void pnext(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - - // Prefetch bytes. - void prfb(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr); - - // Prefetch halfwords. - void prfh(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr); - - // Prefetch words. - void prfw(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr); - - // Prefetch doublewords. - void prfd(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr); - - // Set condition flags for predicate. - void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn); - - // Initialise predicate from named constraint. - void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); - - // Initialise predicate from named constraint. - void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); - - // Unpack and widen half of predicate. - void punpkhi(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn); - - // Unpack and widen half of predicate. - void punpklo(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn); - - // Reverse bits (predicated). - void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Read the first-fault register. - void rdffr(const PRegisterWithLaneSize& pd); - - // Return predicate of succesfully loaded elements. - void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); - - // Return predicate of succesfully loaded elements. - void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); - - // Read multiple of vector register size to scalar register. - void rdvl(const Register& xd, int imm6); - - // Reverse all elements in a predicate. - void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn); - - // Reverse all elements in a vector (unpredicated). - void rev(const ZRegister& zd, const ZRegister& zn); - - // Reverse bytes / halfwords / words within elements (predicated). - void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Reverse bytes / halfwords / words within elements (predicated). - void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Reverse bytes / halfwords / words within elements (predicated). - void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Signed absolute difference (predicated). - void sabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed add reduction to scalar. - void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); - - // Signed integer convert to floating-point (predicated). - void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Signed divide (predicated). - void sdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed reversed divide (predicated). - void sdivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed dot product by indexed quadtuplet. - void sdot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index); - - // Signed dot product. - void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); - - // Conditionally select elements from two predicates. - void sel(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Conditionally select elements from two vectors. - void sel(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Initialise the first-fault register to all true. - void setffr(); - - // Signed maximum vectors (predicated). - void smax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed maximum with immediate (unpredicated). - void smax(const ZRegister& zd, const ZRegister& zn, int imm8); - - // Signed maximum reduction to scalar. - void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Signed minimum vectors (predicated). - void smin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed minimum with immediate (unpredicated). - void smin(const ZRegister& zd, const ZRegister& zn, int imm8); - - // Signed minimum reduction to scalar. - void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Signed multiply returning high half (predicated). - void smulh(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Splice two vectors under predicate control. - void splice(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Signed saturating add vectors (unpredicated). - void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Signed saturating add immediate (unpredicated). - void sqadd(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift = -1); - - // Signed saturating decrement scalar by multiple of 8-bit predicate - // constraint element count. - void sqdecb(const Register& xd, - const Register& wn, - int pattern, - int multiplier); - - // Signed saturating decrement scalar by multiple of 8-bit predicate - // constraint element count. - void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement scalar by multiple of 64-bit predicate - // constraint element count. - void sqdecd(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating decrement scalar by multiple of 64-bit predicate - // constraint element count. - void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement vector by multiple of 64-bit predicate - // constraint element count. - void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement scalar by multiple of 16-bit predicate - // constraint element count. - void sqdech(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating decrement scalar by multiple of 16-bit predicate - // constraint element count. - void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement vector by multiple of 16-bit predicate - // constraint element count. - void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement scalar by active predicate element count. - void sqdecp(const Register& xd, - const PRegisterWithLaneSize& pg, - const Register& wn); - - // Signed saturating decrement scalar by active predicate element count. - void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg); - - // Signed saturating decrement vector by active predicate element count. - void sqdecp(const ZRegister& zdn, const PRegister& pg); - - // Signed saturating decrement scalar by multiple of 32-bit predicate - // constraint element count. - void sqdecw(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating decrement scalar by multiple of 32-bit predicate - // constraint element count. - void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating decrement vector by multiple of 32-bit predicate - // constraint element count. - void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment scalar by multiple of 8-bit predicate - // constraint element count. - void sqincb(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating increment scalar by multiple of 8-bit predicate - // constraint element count. - void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment scalar by multiple of 64-bit predicate - // constraint element count. - void sqincd(const Register& xd, - const Register& wn, - int pattern, - int multiplier); - - // Signed saturating increment scalar by multiple of 64-bit predicate - // constraint element count. - void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment vector by multiple of 64-bit predicate - // constraint element count. - void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment scalar by multiple of 16-bit predicate - // constraint element count. - void sqinch(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating increment scalar by multiple of 16-bit predicate - // constraint element count. - void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment vector by multiple of 16-bit predicate - // constraint element count. - void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment scalar by active predicate element count. - void sqincp(const Register& xd, - const PRegisterWithLaneSize& pg, - const Register& wn); - - // Signed saturating increment scalar by active predicate element count. - void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg); - - // Signed saturating increment vector by active predicate element count. - void sqincp(const ZRegister& zdn, const PRegister& pg); - - // Signed saturating increment scalar by multiple of 32-bit predicate - // constraint element count. - void sqincw(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1); - - // Signed saturating increment scalar by multiple of 32-bit predicate - // constraint element count. - void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating increment vector by multiple of 32-bit predicate - // constraint element count. - void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Signed saturating subtract vectors (unpredicated). - void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Signed saturating subtract immediate (unpredicated). - void sqsub(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift = -1); - - // Contiguous/scatter store bytes from vector. - void st1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous/scatter store halfwords from vector. - void st1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous/scatter store words from vector. - void st1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous/scatter store doublewords from vector. - void st1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store two-byte structures from two vectors. - void st2b(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store two-halfword structures from two vectors. - void st2h(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store two-word structures from two vectors. - void st2w(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store two-doubleword structures from two vectors, - void st2d(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store three-byte structures from three vectors. - void st3b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store three-halfword structures from three vectors. - void st3h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store three-word structures from three vectors. - void st3w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store three-doubleword structures from three vectors. - void st3d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store four-byte structures from four vectors. - void st4b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store four-halfword structures from four vectors. - void st4h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store four-word structures from four vectors. - void st4w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store four-doubleword structures from four vectors. - void st4d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store non-temporal bytes from vector. - void stnt1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store non-temporal halfwords from vector. - void stnt1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store non-temporal words from vector. - void stnt1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Contiguous store non-temporal doublewords from vector. - void stnt1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Store SVE predicate/vector register. - void str(const CPURegister& rt, const SVEMemOperand& addr); - - // Subtract vectors (predicated). - void sub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Subtract vectors (unpredicated). - void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Subtract immediate (unpredicated). - void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); - - // Reversed subtract vectors (predicated). - void subr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Reversed subtract from immediate (unpredicated). - void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); - - // Signed unpack and extend half of vector. - void sunpkhi(const ZRegister& zd, const ZRegister& zn); - - // Signed unpack and extend half of vector. - void sunpklo(const ZRegister& zd, const ZRegister& zn); - - // Signed byte extend (predicated). - void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Signed halfword extend (predicated). - void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Signed word extend (predicated). - void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Programmable table lookup/permute using vector of indices into a - // vector. - void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Interleave even or odd elements from two predicates. - void trn1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Interleave even or odd elements from two vectors. - void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Interleave even or odd elements from two predicates. - void trn2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Interleave even or odd elements from two vectors. - void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Unsigned absolute difference (predicated). - void uabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned add reduction to scalar. - void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); - - // Unsigned integer convert to floating-point (predicated). - void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Unsigned divide (predicated). - void udiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned reversed divide (predicated). - void udivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned dot product by indexed quadtuplet. - void udot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index); - - // Unsigned dot product. - void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); - - // Unsigned maximum vectors (predicated). - void umax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned maximum with immediate (unpredicated). - void umax(const ZRegister& zd, const ZRegister& zn, int imm8); - - // Unsigned maximum reduction to scalar. - void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Unsigned minimum vectors (predicated). - void umin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned minimum with immediate (unpredicated). - void umin(const ZRegister& zd, const ZRegister& zn, int imm8); - - // Unsigned minimum reduction to scalar. - void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); - - // Unsigned multiply returning high half (predicated). - void umulh(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Unsigned saturating add vectors (unpredicated). - void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Unsigned saturating add immediate (unpredicated). - void uqadd(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift = -1); - - // Unsigned saturating decrement scalar by multiple of 8-bit predicate - // constraint element count. - void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement scalar by multiple of 64-bit predicate - // constraint element count. - void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement vector by multiple of 64-bit predicate - // constraint element count. - void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement scalar by multiple of 16-bit predicate - // constraint element count. - void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement vector by multiple of 16-bit predicate - // constraint element count. - void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement scalar by active predicate element count. - void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg); - - // Unsigned saturating decrement vector by active predicate element count. - void uqdecp(const ZRegister& zdn, const PRegister& pg); - - // Unsigned saturating decrement scalar by multiple of 32-bit predicate - // constraint element count. - void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating decrement vector by multiple of 32-bit predicate - // constraint element count. - void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment scalar by multiple of 8-bit predicate - // constraint element count. - void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment scalar by multiple of 64-bit predicate - // constraint element count. - void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment vector by multiple of 64-bit predicate - // constraint element count. - void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment scalar by multiple of 16-bit predicate - // constraint element count. - void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment vector by multiple of 16-bit predicate - // constraint element count. - void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment scalar by active predicate element count. - void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg); - - // Unsigned saturating increment vector by active predicate element count. - void uqincp(const ZRegister& zdn, const PRegister& pg); - - // Unsigned saturating increment scalar by multiple of 32-bit predicate - // constraint element count. - void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating increment vector by multiple of 32-bit predicate - // constraint element count. - void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); - - // Unsigned saturating subtract vectors (unpredicated). - void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Unsigned saturating subtract immediate (unpredicated). - void uqsub(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift = -1); - - // Unsigned unpack and extend half of vector. - void uunpkhi(const ZRegister& zd, const ZRegister& zn); - - // Unsigned unpack and extend half of vector. - void uunpklo(const ZRegister& zd, const ZRegister& zn); - - // Unsigned byte extend (predicated). - void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Unsigned halfword extend (predicated). - void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Unsigned word extend (predicated). - void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); - - // Concatenate even or odd elements from two predicates. - void uzp1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Concatenate even or odd elements from two vectors. - void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Concatenate even or odd elements from two predicates. - void uzp2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Concatenate even or odd elements from two vectors. - void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // While incrementing signed scalar less than or equal to scalar. - void whilele(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm); - - // While incrementing unsigned scalar lower than scalar. - void whilelo(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm); - - // While incrementing unsigned scalar lower or same as scalar. - void whilels(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm); - - // While incrementing signed scalar less than scalar. - void whilelt(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm); - - // Write the first-fault register. - void wrffr(const PRegisterWithLaneSize& pn); - - // Interleave elements from two half predicates. - void zip1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Interleave elements from two half vectors. - void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - - // Interleave elements from two half predicates. - void zip2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm); - - // Interleave elements from two half vectors. - void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); - // Emit generic instructions. // Emit raw instructions into the instruction stream. @@ -5852,20 +3650,20 @@ class Assembler : public vixl::internal::AssemblerBase { // Code generation helpers. // Register encoding. - template <int hibit, int lobit> - static Instr Rx(CPURegister rx) { - VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode); - return ImmUnsignedField<hibit, lobit>(rx.GetCode()); + static Instr Rd(CPURegister rd) { + VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode); + return rd.GetCode() << Rd_offset; } -#define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s) -#define REGISTER_ENCODER(N) \ - static Instr R##N(CPURegister r##N) { \ - return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \ + static Instr Rn(CPURegister rn) { + VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode); + return rn.GetCode() << Rn_offset; + } + + static Instr Rm(CPURegister rm) { + VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); + return rm.GetCode() << Rm_offset; } - CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER) -#undef REGISTER_ENCODER -#undef CPU_REGISTER_FIELD_NAMES static Instr RmNot31(CPURegister rm) { VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); @@ -5873,6 +3671,26 @@ class Assembler : public vixl::internal::AssemblerBase { return Rm(rm); } + static Instr Ra(CPURegister ra) { + VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode); + return ra.GetCode() << Ra_offset; + } + + static Instr Rt(CPURegister rt) { + VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode); + return rt.GetCode() << Rt_offset; + } + + static Instr Rt2(CPURegister rt2) { + VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode); + return rt2.GetCode() << Rt2_offset; + } + + static Instr Rs(CPURegister rs) { + VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode); + return rs.GetCode() << Rs_offset; + } + // These encoding functions allow the stack pointer to be encoded, and // disallow the zero register. static Instr RdSP(Register rd) { @@ -5890,33 +3708,6 @@ class Assembler : public vixl::internal::AssemblerBase { return (rm.GetCode() & kRegCodeMask) << Rm_offset; } - static Instr Pd(PRegister pd) { - return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd); - } - - static Instr Pm(PRegister pm) { - return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm); - } - - static Instr Pn(PRegister pn) { - return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn); - } - - static Instr PgLow8(PRegister pg) { - // Governing predicates can be merging, zeroing, or unqualified. They should - // never have a lane size. - VIXL_ASSERT(!pg.HasLaneSize()); - return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg); - } - - template <int hibit, int lobit> - static Instr Pg(PRegister pg) { - // Governing predicates can be merging, zeroing, or unqualified. They should - // never have a lane size. - VIXL_ASSERT(!pg.HasLaneSize()); - return Rx<hibit, lobit>(pg); - } - // Flags encoding. static Instr Flags(FlagsUpdate S) { if (S == SetFlags) { @@ -5930,26 +3721,6 @@ class Assembler : public vixl::internal::AssemblerBase { static Instr Cond(Condition cond) { return cond << Condition_offset; } - // Generic immediate encoding. - template <int hibit, int lobit> - static Instr ImmField(int64_t imm) { - VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); - VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); - int fieldsize = hibit - lobit + 1; - VIXL_ASSERT(IsIntN(fieldsize, imm)); - return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit); - } - - // For unsigned immediate encoding. - // TODO: Handle signed and unsigned immediate in satisfactory way. - template <int hibit, int lobit> - static Instr ImmUnsignedField(uint64_t imm) { - VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); - VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); - VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm)); - return static_cast<Instr>(imm << lobit); - } - // PC-relative address encoding. static Instr ImmPCRelAddress(int64_t imm21) { VIXL_ASSERT(IsInt21(imm21)); @@ -6000,60 +3771,11 @@ class Assembler : public vixl::internal::AssemblerBase { if (IsUint12(imm)) { // No shift required. imm <<= ImmAddSub_offset; } else { - imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset); + imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset); } return imm; } - static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) { - VIXL_ASSERT(IsUint6(imms)); - VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3)); - USE(lane_size); - return imms << SVEImmSetBits_offset; - } - - static Instr SVEImmRotate(unsigned immr, unsigned lane_size) { - VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr)); - USE(lane_size); - return immr << SVEImmRotate_offset; - } - - static Instr SVEBitN(unsigned bitn) { - VIXL_ASSERT(IsUint1(bitn)); - return bitn << SVEBitN_offset; - } - - static Instr SVEDtype(unsigned msize_in_bytes_log2, - unsigned esize_in_bytes_log2, - bool is_signed, - int dtype_h_lsb = 23, - int dtype_l_lsb = 21) { - VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2); - VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2); - Instr dtype_h = msize_in_bytes_log2; - Instr dtype_l = esize_in_bytes_log2; - // Signed forms use the encodings where msize would be greater than esize. - if (is_signed) { - dtype_h = dtype_h ^ 0x3; - dtype_l = dtype_l ^ 0x3; - } - VIXL_ASSERT(IsUint2(dtype_h)); - VIXL_ASSERT(IsUint2(dtype_l)); - VIXL_ASSERT((dtype_h > dtype_l) == is_signed); - - return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb); - } - - static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2, - unsigned esize_in_bytes_log2, - bool is_signed) { - return SVEDtype(msize_in_bytes_log2, - esize_in_bytes_log2, - is_signed, - 23, - 13); - } - static Instr ImmS(unsigned imms, unsigned reg_size) { VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || ((reg_size == kWRegSize) && IsUint5(imms))); @@ -6134,9 +3856,9 @@ class Assembler : public vixl::internal::AssemblerBase { return TruncateToUint9(imm9) << ImmLS_offset; } - static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) { - VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2)); - int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2); + static Instr ImmLSPair(int64_t imm7, unsigned access_size) { + VIXL_ASSERT(IsMultiple(imm7, 1 << access_size)); + int64_t scaled_imm7 = imm7 / (1 << access_size); VIXL_ASSERT(IsInt7(scaled_imm7)); return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; } @@ -6268,8 +3990,8 @@ class Assembler : public vixl::internal::AssemblerBase { unsigned* n = NULL, unsigned* imm_s = NULL, unsigned* imm_r = NULL); - static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2); - static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2); + static bool IsImmLSPair(int64_t offset, unsigned access_size); + static bool IsImmLSScaled(int64_t offset, unsigned access_size); static bool IsImmLSUnscaled(int64_t offset); static bool IsImmMovn(uint64_t imm, unsigned reg_size); static bool IsImmMovz(uint64_t imm, unsigned reg_size); @@ -6404,30 +4126,6 @@ class Assembler : public vixl::internal::AssemblerBase { } } - template <typename T> - static Instr SVESize(const T& rd) { - VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister()); - VIXL_ASSERT(rd.HasLaneSize()); - switch (rd.GetLaneSizeInBytes()) { - case 1: - return SVE_B; - case 2: - return SVE_H; - case 4: - return SVE_S; - case 8: - return SVE_D; - default: - return 0xffffffff; - } - } - - static Instr ImmSVEPredicateConstraint(int pattern) { - VIXL_ASSERT(IsUint5(pattern)); - return (pattern << ImmSVEPredicateConstraint_offset) & - ImmSVEPredicateConstraint_mask; - } - static Instr ImmNEONHLM(int index, int num_bits) { int h, l, m; if (num_bits == 3) { @@ -6579,93 +4277,9 @@ class Assembler : public vixl::internal::AssemblerBase { const MemOperand& addr, Instr op); - // Set `is_load` to false in default as it's only used in the - // scalar-plus-vector form. - Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2, - int num_regs, - const SVEMemOperand& addr, - bool is_load = false); - - // E.g. st1b, st1h, ... - // This supports both contiguous and scatter stores. - void SVESt1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // E.g. ld1b, ld1h, ... - // This supports both contiguous and gather loads. - void SVELd1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed); - - // E.g. ld1rb, ld1rh, ... - void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed); - - // E.g. ldff1b, ldff1h, ... - // This supports both contiguous and gather loads. - void SVELdff1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed); - - // Common code for the helpers above. - void SVELdSt1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - bool is_signed, - Instr op); - - // Common code for the helpers above. - void SVEScatterGatherHelper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - bool is_load, - bool is_signed, - bool is_first_fault); - - // E.g. st2b, st3h, ... - void SVESt234Helper(int num_regs, - const ZRegister& zt1, - const PRegister& pg, - const SVEMemOperand& addr); - - // E.g. ld2b, ld3h, ... - void SVELd234Helper(int num_regs, - const ZRegister& zt1, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - // Common code for the helpers above. - void SVELdSt234Helper(int num_regs, - const ZRegister& zt1, - const PRegister& pg, - const SVEMemOperand& addr, - Instr op); - - // E.g. ld1qb, ld1qh, ldnt1b, ... - void SVELd1St1ScaImmHelper(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - Instr regoffset_op, - Instr immoffset_op, - int imm_divisor = 1); - void Prefetch(PrefetchOperation op, const MemOperand& addr, LoadStoreScalingOption option = PreferScaledOffset); - void Prefetch(int op, - const MemOperand& addr, - LoadStoreScalingOption option = PreferScaledOffset); // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 // reports a bogus uninitialised warning then. @@ -6673,9 +4287,6 @@ class Assembler : public vixl::internal::AssemblerBase { const Register& rn, const Operand operand, LogicalOp op); - - void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op); - void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, @@ -6695,92 +4306,6 @@ class Assembler : public vixl::internal::AssemblerBase { FlagsUpdate S, AddSubWithCarryOp op); - void CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEIntCompareVectorsOp op); - - void CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm, - SVEIntCompareSignedImmOp op); - - void CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm, - SVEIntCompareUnsignedImmOp op); - - void SVEIntAddSubtractImmUnpredicatedHelper( - SVEIntAddSubtractImm_UnpredicatedOp op, - const ZRegister& zd, - int imm8, - int shift); - - void SVEElementCountToRegisterHelper(Instr op, - const Register& rd, - int pattern, - int multiplier); - - Instr EncodeSVEShiftImmediate(Shift shift_op, - int shift, - int lane_size_in_bits); - - void SVEBitwiseShiftImmediate(const ZRegister& zd, - const ZRegister& zn, - Instr encoded_imm, - SVEBitwiseShiftUnpredicatedOp op); - - void SVEBitwiseShiftImmediatePred(const ZRegister& zdn, - const PRegisterM& pg, - Instr encoded_imm, - SVEBitwiseShiftByImm_PredicatedOp op); - - Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, - const ZRegister& zm, - int index, - Instr op_h, - Instr op_s, - Instr op_d); - - - void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size); - - void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size); - - void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size); - - void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size); - - void SVEPrefetchHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size); - - static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) { - // SVE only supports PLD and PST, not PLI. - VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) || - ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM))); - // Check that we can simply map bits. - VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000); - VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000); - // Remaining operations map directly. - return ((prfop & 0b10000) >> 1) | (prfop & 0b00111); - } // Functions for emulating operands not directly supported by the instruction // set. @@ -6982,16 +4507,12 @@ class Assembler : public vixl::internal::AssemblerBase { NEONShiftImmediateOp op); void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); - // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8) - // and *shift is either 0 or 8. Otherwise, leave the values unchanged. - void ResolveSVEImm8Shift(int* imm8, int* shift); - Instr LoadStoreStructAddrModeField(const MemOperand& addr); // Encode the specified MemOperand for the specified access size and scaling // preference. Instr LoadStoreMemOperand(const MemOperand& addr, - unsigned access_size_in_bytes_log2, + unsigned access_size, LoadStoreScalingOption option); // Link the current (not-yet-emitted) instruction to the specified label, then diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc deleted file mode 100644 index f7cf8b21..00000000 --- a/src/aarch64/assembler-sve-aarch64.cc +++ /dev/null @@ -1,6489 +0,0 @@ -// Copyright 2019, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "assembler-aarch64.h" - -namespace vixl { -namespace aarch64 { - -void Assembler::ResolveSVEImm8Shift(int* imm8, int* shift) { - if (*shift < 0) { - VIXL_ASSERT(*shift == -1); - // Derive the shift amount from the immediate. - if (IsInt8(*imm8)) { - *shift = 0; - } else if ((*imm8 % 256) == 0) { - *imm8 /= 256; - *shift = 8; - } - } - - VIXL_ASSERT(IsInt8(*imm8)); - VIXL_ASSERT((*shift == 0) || (*shift == 8)); -} - -// SVEAddressGeneration. - -void Assembler::adr(const ZRegister& zd, const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsVectorPlusVector()); - VIXL_ASSERT( - AreSameLaneSize(zd, addr.GetVectorBase(), addr.GetVectorOffset())); - - int lane_size = zd.GetLaneSizeInBits(); - VIXL_ASSERT((lane_size == kSRegSize) || (lane_size == kDRegSize)); - - int shift_amount = addr.GetShiftAmount(); - VIXL_ASSERT((shift_amount >= 0) && (shift_amount <= 3)); - - Instr op = 0xffffffff; - Instr msz = shift_amount << 10; - SVEOffsetModifier mod = addr.GetOffsetModifier(); - switch (mod) { - case SVE_UXTW: - VIXL_ASSERT(lane_size == kDRegSize); - op = ADR_z_az_d_u32_scaled; - break; - case SVE_SXTW: - VIXL_ASSERT(lane_size == kDRegSize); - op = ADR_z_az_d_s32_scaled; - break; - case SVE_LSL: - case NO_SVE_OFFSET_MODIFIER: - op = (lane_size == kSRegSize) ? ADR_z_az_s_same_scaled - : ADR_z_az_d_same_scaled; - break; - default: - VIXL_UNIMPLEMENTED(); - } - Emit(op | msz | Rd(zd) | Rn(addr.GetVectorBase()) | - Rm(addr.GetVectorOffset())); -} - -void Assembler::SVELogicalImmediate(const ZRegister& zdn, - uint64_t imm, - Instr op) { - unsigned bit_n, imm_s, imm_r; - unsigned lane_size = zdn.GetLaneSizeInBits(); - // Check that the immediate can be encoded in the instruction. - if (IsImmLogical(imm, lane_size, &bit_n, &imm_s, &imm_r)) { - Emit(op | Rd(zdn) | SVEBitN(bit_n) | SVEImmRotate(imm_r, lane_size) | - SVEImmSetBits(imm_s, lane_size)); - } else { - VIXL_UNREACHABLE(); - } -} - -void Assembler::and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - SVELogicalImmediate(zd, imm, AND_z_zi); -} - -void Assembler::dupm(const ZRegister& zd, uint64_t imm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - // DUPM_z_i is an SVEBroadcastBitmaskImmOp, but its encoding and constraints - // are similar enough to SVEBitwiseLogicalWithImm_UnpredicatedOp, that we can - // use the logical immediate encoder to get the correct behaviour. - SVELogicalImmediate(zd, imm, DUPM_z_i); -} - -void Assembler::eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - SVELogicalImmediate(zd, imm, EOR_z_zi); -} - -void Assembler::orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - SVELogicalImmediate(zd, imm, ORR_z_zi); -} - -// SVEBitwiseLogicalUnpredicated. -void Assembler::and_(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - Emit(AND_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::bic(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - Emit(BIC_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::eor(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - Emit(EOR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::orr(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.IsLaneSizeD()); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - Emit(ORR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); -} - -// SVEBitwiseShiftPredicated. - -void Assembler::SVEBitwiseShiftImmediatePred( - const ZRegister& zdn, - const PRegisterM& pg, - Instr encoded_imm_and_tsz, - SVEBitwiseShiftByImm_PredicatedOp op) { - Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) - << 5; - Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; - Emit(op | tszh | tszl_and_imm | PgLow8(pg) | Rd(zdn)); -} - -void Assembler::asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> - // 0000 0100 ..00 0000 100. .... .... .... - // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 0 | Pg<12:10> | - // tszl<9:8> | imm3<7:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi); -} - -void Assembler::asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D - // 0000 0100 ..01 1000 100. .... .... .... - // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || - ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && - (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); - Instr op = ASR_z_p_zw; - if (AreSameLaneSize(zd, zn, zm)) { - op = ASR_z_p_zz; - } - Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::asrd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - // ASRD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> - // 0000 0100 ..00 0100 100. .... .... .... - // tszh<23:22> | opc<19:18> = 01 | L<17> = 0 | U<16> = 0 | Pg<12:10> | - // tszl<9:8> | imm3<7:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - - Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi); -} - -void Assembler::asrr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0100 100. .... .... .... - // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(ASRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> - // 0000 0100 ..00 0011 100. .... .... .... - // tszh<23:22> | opc<19:18> = 00 | L<17> = 1 | U<16> = 1 | Pg<12:10> | - // tszl<9:8> | imm3<7:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - - Instr encoded_imm = - EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi); -} - -void Assembler::lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D - // 0000 0100 ..01 1011 100. .... .... .... - // size<23:22> | R<18> = 0 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || - ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && - (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); - Instr op = LSL_z_p_zw; - if (AreSameLaneSize(zd, zn, zm)) { - op = LSL_z_p_zz; - } - Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::lslr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0111 100. .... .... .... - // size<23:22> | R<18> = 1 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(LSLR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const> - // 0000 0100 ..00 0001 100. .... .... .... - // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 1 | Pg<12:10> | - // tszl<9:8> | imm3<7:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - - Instr encoded_imm = - EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi); -} - -void Assembler::lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D - // 0000 0100 ..01 1001 100. .... .... .... - // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || - ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && - (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); - Instr op = LSR_z_p_zw; - if (AreSameLaneSize(zd, zn, zm)) { - op = LSR_z_p_zz; - } - Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::lsrr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0101 100. .... .... .... - // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | - // Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(LSRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -// SVEBitwiseShiftUnpredicated. - -Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op, - int shift, - int lane_size_in_bits) { - if (shift_op == LSL) { - VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); - return lane_size_in_bits + shift; - } - - VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR)); - VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits)); - return (2 * lane_size_in_bits) - shift; -} - -void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd, - const ZRegister& zn, - Instr encoded_imm_and_tsz, - SVEBitwiseShiftUnpredicatedOp op) { - Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) - << 16; - Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; - Emit(op | tszh | tszl_and_imm | Rd(zd) | Rn(zn)); -} - -void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - Instr encoded_imm = - EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi); -} - -void Assembler::asr(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); - - Emit(ASR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr encoded_imm = - EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi); -} - -void Assembler::lsl(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); - - Emit(LSL_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr encoded_imm = - EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits()); - SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi); -} - -void Assembler::lsr(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); - - Emit(LSR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -// SVEElementCount. - -#define VIXL_SVE_INC_DEC_LIST(V) \ - V(cntb, CNTB_r_s) \ - V(cnth, CNTH_r_s) \ - V(cntw, CNTW_r_s) \ - V(cntd, CNTD_r_s) \ - V(decb, DECB_r_rs) \ - V(dech, DECH_r_rs) \ - V(decw, DECW_r_rs) \ - V(decd, DECD_r_rs) \ - V(incb, INCB_r_rs) \ - V(inch, INCH_r_rs) \ - V(incw, INCW_r_rs) \ - V(incd, INCD_r_rs) \ - V(sqdecb, SQDECB_r_rs_x) \ - V(sqdech, SQDECH_r_rs_x) \ - V(sqdecw, SQDECW_r_rs_x) \ - V(sqdecd, SQDECD_r_rs_x) \ - V(sqincb, SQINCB_r_rs_x) \ - V(sqinch, SQINCH_r_rs_x) \ - V(sqincw, SQINCW_r_rs_x) \ - V(sqincd, SQINCD_r_rs_x) - -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ - void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(rdn.IsX()); \ - Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ - ImmUnsignedField<19, 16>(multiplier - 1)); \ - } -VIXL_SVE_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC - -#define VIXL_SVE_UQINC_UQDEC_LIST(V) \ - V(uqdecb, (rdn.IsX() ? UQDECB_r_rs_x : UQDECB_r_rs_uw)) \ - V(uqdech, (rdn.IsX() ? UQDECH_r_rs_x : UQDECH_r_rs_uw)) \ - V(uqdecw, (rdn.IsX() ? UQDECW_r_rs_x : UQDECW_r_rs_uw)) \ - V(uqdecd, (rdn.IsX() ? UQDECD_r_rs_x : UQDECD_r_rs_uw)) \ - V(uqincb, (rdn.IsX() ? UQINCB_r_rs_x : UQINCB_r_rs_uw)) \ - V(uqinch, (rdn.IsX() ? UQINCH_r_rs_x : UQINCH_r_rs_uw)) \ - V(uqincw, (rdn.IsX() ? UQINCW_r_rs_x : UQINCW_r_rs_uw)) \ - V(uqincd, (rdn.IsX() ? UQINCD_r_rs_x : UQINCD_r_rs_uw)) - -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ - void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ - ImmUnsignedField<19, 16>(multiplier - 1)); \ - } -VIXL_SVE_UQINC_UQDEC_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC - -#define VIXL_SVE_SQX_INC_DEC_LIST(V) \ - V(sqdecb, SQDECB) \ - V(sqdech, SQDECH) \ - V(sqdecw, SQDECW) \ - V(sqdecd, SQDECD) \ - V(sqincb, SQINCB) \ - V(sqinch, SQINCH) \ - V(sqincw, SQINCW) \ - V(sqincd, SQINCD) - -#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ - void Assembler::FN(const Register& xd, \ - const Register& wn, \ - int pattern, \ - int multiplier) { \ - USE(wn); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(wn.IsW() && xd.Is(wn.X())); \ - Emit(OP##_r_rs_sx | Rd(xd) | ImmSVEPredicateConstraint(pattern) | \ - ImmUnsignedField<19, 16>(multiplier - 1)); \ - } -VIXL_SVE_SQX_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC - -#define VIXL_SVE_INC_DEC_VEC_LIST(V) \ - V(dech, DEC, H) \ - V(decw, DEC, W) \ - V(decd, DEC, D) \ - V(inch, INC, H) \ - V(incw, INC, W) \ - V(incd, INC, D) \ - V(sqdech, SQDEC, H) \ - V(sqdecw, SQDEC, W) \ - V(sqdecd, SQDEC, D) \ - V(sqinch, SQINC, H) \ - V(sqincw, SQINC, W) \ - V(sqincd, SQINC, D) \ - V(uqdech, UQDEC, H) \ - V(uqdecw, UQDEC, W) \ - V(uqdecd, UQDEC, D) \ - V(uqinch, UQINC, H) \ - V(uqincw, UQINC, W) \ - V(uqincd, UQINC, D) - -#define VIXL_DEFINE_ASM_FUNC(FN, OP, T) \ - void Assembler::FN(const ZRegister& zdn, int pattern, int multiplier) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(zdn.GetLaneSizeInBytes() == k##T##RegSizeInBytes); \ - Emit(OP##T##_z_zs | Rd(zdn) | ImmSVEPredicateConstraint(pattern) | \ - ImmUnsignedField<19, 16>(multiplier - 1)); \ - } -VIXL_SVE_INC_DEC_VEC_LIST(VIXL_DEFINE_ASM_FUNC) -#undef VIXL_DEFINE_ASM_FUNC - -// SVEFPAccumulatingReduction. - -void Assembler::fadda(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> - // 0110 0101 ..01 1000 001. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Vdn<4:0> - - USE(vn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.Is(vn)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zm.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zm, vd)); - - Emit(FADDA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); -} - -// SVEFPArithmetic_Predicated. - -void Assembler::fabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 1000 100. .... .... .... - // size<23:22> | opc<19:16> = 1000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1000 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FADD_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0000 100. .... .... .... - // size<23:22> | opc<19:16> = 0000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 1101 100. .... .... .... - // size<23:22> | opc<19:16> = 1101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fdivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 1100 100. .... .... .... - // size<23:22> | opc<19:16> = 1100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1110 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 110 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FMAX_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0110 100. .... .... .... - // size<23:22> | opc<19:16> = 0110 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1100 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 100 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FMAXNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0100 100. .... .... .... - // size<23:22> | opc<19:16> = 0100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMAXNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1111 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 111 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FMIN_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0111 100. .... .... .... - // size<23:22> | opc<19:16> = 0111 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1101 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 101 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FMINNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0101 100. .... .... .... - // size<23:22> | opc<19:16> = 0101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMINNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1010 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 010 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((imm == 0.5) || (imm == 2.0)); - - Instr i1 = (imm == 2.0) ? (1 << 5) : 0; - Emit(FMUL_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0010 100. .... .... .... - // size<23:22> | opc<19:16> = 0010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fmulx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 1010 100. .... .... .... - // size<23:22> | opc<19:16> = 1010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMULX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fscale(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 1001 100. .... .... .... - // size<23:22> | opc<19:16> = 1001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FSCALE_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1001 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 001 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FSUB_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0001 100. .... .... .... - // size<23:22> | opc<19:16> = 0001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FSUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::fsubr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const> - // 0110 0101 ..01 1011 100. ..00 00.. .... - // size<23:22> | opc<18:16> = 011 | Pg<12:10> | i1<5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); - - Instr i1 = (imm == 1.0) ? (1 << 5) : 0; - Emit(FSUBR_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); -} - -void Assembler::fsubr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0110 0101 ..00 0011 100. .... .... .... - // size<23:22> | opc<19:16> = 0011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FSUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::ftmad(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int imm3) { - // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm> - // 0110 0101 ..01 0... 1000 00.. .... .... - // size<23:22> | imm3<18:16> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FTMAD_z_zzi | SVESize(zd) | Rd(zd) | Rn(zm) | - ImmUnsignedField<18, 16>(imm3)); -} - -// SVEFPArithmeticUnpredicated. - -void Assembler::fadd(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0000 00.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::fmul(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0000 10.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::frecps(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0001 10.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRECPS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::frsqrts(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0001 11.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRSQRTS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::fsub(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0000 01.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::ftsmul(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 0000 11.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FTSMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -// SVEFPCompareVectors. - -void Assembler::facge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 110. .... ...1 .... - // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | - // o3<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FACGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::facgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 111. .... ...1 .... - // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | - // o3<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FACGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 011. .... ...0 .... - // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | - // o3<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FCMEQ_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 010. .... ...0 .... - // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | - // o3<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FCMGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 010. .... ...1 .... - // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | - // o3<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FCMGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 011. .... ...1 .... - // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | - // o3<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FCMNE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fcmuo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..0. .... 110. .... ...0 .... - // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | - // o3<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FCMUO_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -// SVEFPCompareWithZero. - -void Assembler::fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0010 001. .... ...0 .... - // size<23:22> | eq<17> = 1 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMEQ_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0000 001. .... ...0 .... - // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMGE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0000 001. .... ...1 .... - // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMGT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcmle(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0001 001. .... ...1 .... - // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMLE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcmlt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0001 001. .... ...0 .... - // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMLT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0 - // 0110 0101 ..01 0011 001. .... ...0 .... - // size<23:22> | eq<17> = 1 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(zero == 0.0); - USE(zero); - - Emit(FCMNE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); -} - -// SVEFPComplexAddition. - -void Assembler::fcadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot) { - // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const> - // 0110 0100 ..00 000. 100. .... .... .... - // size<23:22> | rot<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((rot == 90) || (rot == 270)); - - Instr rotate_bit = (rot == 90) ? 0 : (1 << 16); - Emit(FCADD_z_p_zz | rotate_bit | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -// SVEFPComplexMulAdd. - -void Assembler::fcmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot) { - // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const> - // 0110 0100 ..0. .... 0... .... .... .... - // size<23:22> | Zm<20:16> | rot<14:13> | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); - - Instr rotate_bit = (rot / 90) << 13; - Emit(FCMLA_z_p_zzz | rotate_bit | SVESize(zda) | Rd(zda) | PgLow8(pg) | - Rn(zn) | Rm(zm)); -} - -// SVEFPComplexMulAddIndex. - -void Assembler::fcmla(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index, - int rot) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); - VIXL_ASSERT(index >= 0); - - int lane_size = zda.GetLaneSizeInBytes(); - - Instr zm_and_idx = 0; - Instr op = FCMLA_z_zzzi_h; - if (lane_size == kHRegSizeInBytes) { - // Zm<18:16> | i2<20:19> - VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); - zm_and_idx = (index << 19) | Rx<18, 16>(zm); - } else { - // Zm<19:16> | i1<20> - VIXL_ASSERT(lane_size == kSRegSizeInBytes); - VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); - zm_and_idx = (index << 20) | Rx<19, 16>(zm); - op = FCMLA_z_zzzi_s; - } - - Instr rotate_bit = (rot / 90) << 10; - Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn)); -} - -// SVEFPFastReduction. - -void Assembler::faddv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // FADDV <V><d>, <Pg>, <Zn>.<T> - // 0110 0101 ..00 0000 001. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zn, vd)); - - Emit(FADDV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fmaxnmv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // FMAXNMV <V><d>, <Pg>, <Zn>.<T> - // 0110 0101 ..00 0100 001. .... .... .... - // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zn, vd)); - - Emit(FMAXNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fmaxv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // FMAXV <V><d>, <Pg>, <Zn>.<T> - // 0110 0101 ..00 0110 001. .... .... .... - // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zn, vd)); - - Emit(FMAXV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fminnmv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // FMINNMV <V><d>, <Pg>, <Zn>.<T> - // 0110 0101 ..00 0101 001. .... .... .... - // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zn, vd)); - - Emit(FMINNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fminv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // FMINV <V><d>, <Pg>, <Zn>.<T> - // 0110 0101 ..00 0111 001. .... .... .... - // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(AreSameLaneSize(zn, vd)); - - Emit(FMINV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -// SVEFPMulAdd. - -void Assembler::fmad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0110 0101 ..1. .... 100. .... .... .... - // size<23:22> | Za<20:16> | opc<14:13> = 00 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); -} - -void Assembler::fmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..1. .... 000. .... .... .... - // size<23:22> | Zm<20:16> | opc<14:13> = 00 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fmls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..1. .... 001. .... .... .... - // size<23:22> | Zm<20:16> | opc<14:13> = 01 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fmsb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0110 0101 ..1. .... 101. .... .... .... - // size<23:22> | Za<20:16> | opc<14:13> = 01 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); -} - -void Assembler::fnmad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0110 0101 ..1. .... 110. .... .... .... - // size<23:22> | Za<20:16> | opc<14:13> = 10 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FNMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); -} - -void Assembler::fnmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..1. .... 010. .... .... .... - // size<23:22> | Zm<20:16> | opc<14:13> = 10 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FNMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fnmls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0110 0101 ..1. .... 011. .... .... .... - // size<23:22> | Zm<20:16> | opc<14:13> = 11 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FNMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::fnmsb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0110 0101 ..1. .... 111. .... .... .... - // size<23:22> | Za<20:16> | opc<14:13> = 11 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); -} - -Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2, - const ZRegister& zm, - int index, - Instr op_h, - Instr op_s, - Instr op_d) { - Instr size = lane_size_in_bytes_log2 << SVESize_offset; - Instr zm_with_index = Rm(zm); - Instr op = 0xffffffff; - // Allowable register number and lane index depends on the lane size. - switch (lane_size_in_bytes_log2) { - case kHRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 7); - VIXL_ASSERT(IsUint3(index)); - // For H-sized lanes, size is encoded as 0b0x, where x is used as the top - // bit of the index. So, if index is less than four, the top bit of index - // is zero, and therefore size is 0b00. Otherwise, it's 0b01, the usual - // encoding for H-sized lanes. - if (index < 4) size = 0; - // Top two bits of "zm" encode the index. - zm_with_index |= (index & 3) << (Rm_offset + 3); - op = op_h; - break; - case kSRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 7); - VIXL_ASSERT(IsUint2(index)); - // Top two bits of "zm" encode the index. - zm_with_index |= (index & 3) << (Rm_offset + 3); - op = op_s; - break; - case kDRegSizeInBytesLog2: - VIXL_ASSERT(zm.GetCode() <= 15); - VIXL_ASSERT(IsUint1(index)); - // Top bit of "zm" encodes the index. - zm_with_index |= (index & 1) << (Rm_offset + 4); - op = op_d; - break; - default: - VIXL_UNIMPLEMENTED(); - } - return op | zm_with_index | size; -} - -// SVEFPMulAddIndex. - -void Assembler::fmla(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - - // The encoding of opcode, index, Zm, and size are synthesized in this - // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), - zm, - index, - FMLA_z_zzzi_h, - FMLA_z_zzzi_s, - FMLA_z_zzzi_d); - - Emit(synthesized_op | Rd(zda) | Rn(zn)); -} - -void Assembler::fmls(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - - // The encoding of opcode, index, Zm, and size are synthesized in this - // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(), - zm, - index, - FMLS_z_zzzi_h, - FMLS_z_zzzi_s, - FMLS_z_zzzi_d); - - Emit(synthesized_op | Rd(zda) | Rn(zn)); -} - -// SVEFPMulIndex. - -// This prototype maps to 3 instruction encodings: -void Assembler::fmul(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned index) { - // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>[<imm>] - // 0110 0100 ..1. .... 0010 00.. .... .... - // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - // The encoding of opcode, index, Zm, and size are synthesized in this - // variable. - Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(), - zm, - index, - FMUL_z_zzi_h, - FMUL_z_zzi_s, - FMUL_z_zzi_d); - - Emit(synthesized_op | Rd(zd) | Rn(zn)); -} - -// SVEFPUnaryOpPredicated. - -void Assembler::fcvt(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Instr op = 0xffffffff; - switch (zn.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kSRegSizeInBytes: - op = FCVT_z_p_z_h2s; - break; - case kDRegSizeInBytes: - op = FCVT_z_p_z_h2d; - break; - } - break; - case kSRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = FCVT_z_p_z_s2h; - break; - case kDRegSizeInBytes: - op = FCVT_z_p_z_s2d; - break; - } - break; - case kDRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = FCVT_z_p_z_d2h; - break; - case kSRegSizeInBytes: - op = FCVT_z_p_z_d2s; - break; - } - break; - } - VIXL_ASSERT(op != 0xffffffff); - - Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcvtzs(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr op = 0xffffffff; - switch (zn.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = FCVTZS_z_p_z_fp162h; - break; - case kSRegSizeInBytes: - op = FCVTZS_z_p_z_fp162w; - break; - case kDRegSizeInBytes: - op = FCVTZS_z_p_z_fp162x; - break; - } - break; - case kSRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kSRegSizeInBytes: - op = FCVTZS_z_p_z_s2w; - break; - case kDRegSizeInBytes: - op = FCVTZS_z_p_z_s2x; - break; - } - break; - case kDRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kSRegSizeInBytes: - op = FCVTZS_z_p_z_d2w; - break; - case kDRegSizeInBytes: - op = FCVTZS_z_p_z_d2x; - break; - } - break; - } - VIXL_ASSERT(op != 0xffffffff); - - Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fcvtzu(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr op = 0xffffffff; - switch (zn.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = FCVTZU_z_p_z_fp162h; - break; - case kSRegSizeInBytes: - op = FCVTZU_z_p_z_fp162w; - break; - case kDRegSizeInBytes: - op = FCVTZU_z_p_z_fp162x; - break; - } - break; - case kSRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kSRegSizeInBytes: - op = FCVTZU_z_p_z_s2w; - break; - case kDRegSizeInBytes: - op = FCVTZU_z_p_z_s2x; - break; - } - break; - case kDRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kSRegSizeInBytes: - op = FCVTZU_z_p_z_d2w; - break; - case kDRegSizeInBytes: - op = FCVTZU_z_p_z_d2x; - break; - } - break; - } - VIXL_ASSERT(op != 0xffffffff); - - Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frecpx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0110 0101 ..00 1100 101. .... .... .... - // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRECPX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frinta(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTA_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frinti(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTI_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frintm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTM_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frintn(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTN_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frintp(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTP_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frintx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::frintz(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRINTZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fsqrt(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // FSQRT <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0110 0101 ..00 1101 101. .... .... .... - // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FSQRT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::scvtf(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr op = 0xffffffff; - switch (zn.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = SCVTF_z_p_z_h2fp16; - break; - } - break; - case kSRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = SCVTF_z_p_z_w2fp16; - break; - case kSRegSizeInBytes: - op = SCVTF_z_p_z_w2s; - break; - case kDRegSizeInBytes: - op = SCVTF_z_p_z_w2d; - break; - } - break; - case kDRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = SCVTF_z_p_z_x2fp16; - break; - case kSRegSizeInBytes: - op = SCVTF_z_p_z_x2s; - break; - case kDRegSizeInBytes: - op = SCVTF_z_p_z_x2d; - break; - } - break; - } - VIXL_ASSERT(op != 0xffffffff); - - Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::ucvtf(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Instr op = 0xffffffff; - switch (zn.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = UCVTF_z_p_z_h2fp16; - break; - } - break; - case kSRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = UCVTF_z_p_z_w2fp16; - break; - case kSRegSizeInBytes: - op = UCVTF_z_p_z_w2s; - break; - case kDRegSizeInBytes: - op = UCVTF_z_p_z_w2d; - break; - } - break; - case kDRegSizeInBytes: - switch (zd.GetLaneSizeInBytes()) { - case kHRegSizeInBytes: - op = UCVTF_z_p_z_x2fp16; - break; - case kSRegSizeInBytes: - op = UCVTF_z_p_z_x2s; - break; - case kDRegSizeInBytes: - op = UCVTF_z_p_z_x2d; - break; - } - break; - } - VIXL_ASSERT(op != 0xffffffff); - - Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -// SVEFPUnaryOpUnpredicated. - -void Assembler::frecpe(const ZRegister& zd, const ZRegister& zn) { - // FRECPE <Zd>.<T>, <Zn>.<T> - // 0110 0101 ..00 1110 0011 00.. .... .... - // size<23:22> | opc<18:16> = 110 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRECPE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::frsqrte(const ZRegister& zd, const ZRegister& zn) { - // FRSQRTE <Zd>.<T>, <Zn>.<T> - // 0110 0101 ..00 1111 0011 00.. .... .... - // size<23:22> | opc<18:16> = 111 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FRSQRTE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -// SVEIncDecByPredicateCount. - -void Assembler::decp(const Register& rdn, const PRegisterWithLaneSize& pg) { - // DECP <Xdn>, <Pg>.<T> - // 0010 0101 ..10 1101 1000 100. .... .... - // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rdn.IsX()); - - Emit(DECP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); -} - -void Assembler::decp(const ZRegister& zdn, const PRegister& pg) { - // DECP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1101 1000 000. .... .... - // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | - // Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(DECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -void Assembler::incp(const Register& rdn, const PRegisterWithLaneSize& pg) { - // INCP <Xdn>, <Pg>.<T> - // 0010 0101 ..10 1100 1000 100. .... .... - // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rdn.IsX()); - - Emit(INCP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); -} - -void Assembler::incp(const ZRegister& zdn, const PRegister& pg) { - // INCP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1100 1000 000. .... .... - // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | - // Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(INCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -void Assembler::sqdecp(const Register& xd, - const PRegisterWithLaneSize& pg, - const Register& wn) { - // SQDECP <Xdn>, <Pg>.<T>, <Wdn> - // 0010 0101 ..10 1010 1000 100. .... .... - // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - USE(wn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); - - Emit(SQDECP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); -} - -void Assembler::sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { - // SQDECP <Xdn>, <Pg>.<T> - // 0010 0101 ..10 1010 1000 110. .... .... - // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xdn.IsX()); - - Emit(SQDECP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); -} - -void Assembler::sqdecp(const ZRegister& zdn, const PRegister& pg) { - // SQDECP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1010 1000 000. .... .... - // size<23:22> | D<17> = 1 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(SQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -void Assembler::sqincp(const Register& xd, - const PRegisterWithLaneSize& pg, - const Register& wn) { - // SQINCP <Xdn>, <Pg>.<T>, <Wdn> - // 0010 0101 ..10 1000 1000 100. .... .... - // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - USE(wn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); - - Emit(SQINCP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); -} - -void Assembler::sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { - // SQINCP <Xdn>, <Pg>.<T> - // 0010 0101 ..10 1000 1000 110. .... .... - // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xdn.IsX()); - - Emit(SQINCP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); -} - -void Assembler::sqincp(const ZRegister& zdn, const PRegister& pg) { - // SQINCP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1000 1000 000. .... .... - // size<23:22> | D<17> = 0 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(SQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -void Assembler::uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { - // UQDECP <Wdn>, <Pg>.<T> - // UQDECP <Xdn>, <Pg>.<T> - // 0010 0101 ..10 1011 1000 10.. .... .... - // size<23:22> | D<17> = 1 | U<16> = 1 | sf<10> | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Instr op = rdn.IsX() ? UQDECP_r_p_r_x : UQDECP_r_p_r_uw; - Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); -} - -void Assembler::uqdecp(const ZRegister& zdn, const PRegister& pg) { - // UQDECP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1011 1000 000. .... .... - // size<23:22> | D<17> = 1 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(UQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -void Assembler::uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { - // UQINCP <Wdn>, <Pg>.<T> - // 0010 0101 ..10 1001 1000 100. .... .... - // size<23:22> | D<17> = 0 | U<16> = 1 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | - // Rdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Instr op = rdn.IsX() ? UQINCP_r_p_r_x : UQINCP_r_p_r_uw; - Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); -} - -void Assembler::uqincp(const ZRegister& zdn, const PRegister& pg) { - // UQINCP <Zdn>.<T>, <Pg> - // 0010 0101 ..10 1001 1000 000. .... .... - // size<23:22> | D<17> = 0 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); - VIXL_ASSERT(pg.IsUnqualified()); - - Emit(UQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); -} - -// SVEIndexGeneration. - -void Assembler::index(const ZRegister& zd, int start, int step) { - // INDEX <Zd>.<T>, #<imm1>, #<imm2> - // 0000 0100 ..1. .... 0100 00.. .... .... - // size<23:22> | step<20:16> | start<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(INDEX_z_ii | SVESize(zd) | ImmField<20, 16>(step) | - ImmField<9, 5>(start) | Rd(zd)); -} - -void Assembler::index(const ZRegister& zd, - const Register& rn, - const Register& rm) { - // INDEX <Zd>.<T>, <R><n>, <R><m> - // 0000 0100 ..1. .... 0100 11.. .... .... - // size<23:22> | Rm<20:16> | Rn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= - zd.GetLaneSizeInBits()); - VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >= - zd.GetLaneSizeInBits()); - - Emit(INDEX_z_rr | SVESize(zd) | Rd(zd) | Rn(rn) | Rm(rm)); -} - -void Assembler::index(const ZRegister& zd, const Register& rn, int imm5) { - // INDEX <Zd>.<T>, <R><n>, #<imm> - // 0000 0100 ..1. .... 0100 01.. .... .... - // size<23:22> | imm5<20:16> | Rn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= - zd.GetLaneSizeInBits()); - - Emit(INDEX_z_ri | SVESize(zd) | Rd(zd) | Rn(rn) | ImmField<20, 16>(imm5)); -} - -void Assembler::index(const ZRegister& zd, int imm5, const Register& rm) { - // INDEX <Zd>.<T>, #<imm>, <R><m> - // 0000 0100 ..1. .... 0100 10.. .... .... - // size<23:22> | Rm<20:16> | imm5<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >= - zd.GetLaneSizeInBits()); - - Emit(INDEX_z_ir | SVESize(zd) | Rd(zd) | ImmField<9, 5>(imm5) | Rm(rm)); -} - -// SVEIntArithmeticUnpredicated. - -void Assembler::add(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0000 00.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(ADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::sqadd(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0001 00.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(SQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::sqsub(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0001 10.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(SQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::sub(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0000 01.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(SUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::uqadd(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0001 01.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(UQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::uqsub(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 0001 11.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(UQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -// SVEIntBinaryArithmeticPredicated. - -void Assembler::add(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 0000 000. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(ADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::and_(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 1010 000. .... .... .... - // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(AND_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::bic(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 1011 000. .... .... .... - // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(BIC_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::eor(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 1001 000. .... .... .... - // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(EOR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::mul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // MUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0000 000. .... .... .... - // size<23:22> | H<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(MUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::orr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 1000 000. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(ORR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::sabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1100 000. .... .... .... - // size<23:22> | opc<18:17> = 10 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::sdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0100 000. .... .... .... - // size<23:22> | R<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(SDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::sdivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0110 000. .... .... .... - // size<23:22> | R<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(SDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::smax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1000 000. .... .... .... - // size<23:22> | opc<18:17> = 00 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::smin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1010 000. .... .... .... - // size<23:22> | opc<18:17> = 01 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::smulh(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0010 000. .... .... .... - // size<23:22> | H<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::sub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 0001 000. .... .... .... - // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::subr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 0011 000. .... .... .... - // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(SUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::uabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1101 000. .... .... .... - // size<23:22> | opc<18:17> = 10 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(UABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::udiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0101 000. .... .... .... - // size<23:22> | R<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(UDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::udivr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0111 000. .... .... .... - // size<23:22> | R<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(UDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::umax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1001 000. .... .... .... - // size<23:22> | opc<18:17> = 00 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(UMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::umin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..00 1011 000. .... .... .... - // size<23:22> | opc<18:17> = 01 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(UMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::umulh(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // UMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T> - // 0000 0100 ..01 0011 000. .... .... .... - // size<23:22> | H<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(UMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -// SVEIntCompareScalars. - -void Assembler::ctermeq(const Register& rn, const Register& rm) { - // CTERMEQ <R><n>, <R><m> - // 0010 0101 1.1. .... 0010 00.. ...0 0000 - // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 0 - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; - - Emit(CTERMEQ_rr | sz | Rn(rn) | Rm(rm)); -} - -void Assembler::ctermne(const Register& rn, const Register& rm) { - // CTERMNE <R><n>, <R><m> - // 0010 0101 1.1. .... 0010 00.. ...1 0000 - // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 1 - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; - - Emit(CTERMNE_rr | sz | Rn(rn) | Rm(rm)); -} - -void Assembler::whilele(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - // WHILELE <Pd>.<T>, <R><n>, <R><m> - // 0010 0101 ..1. .... 000. 01.. ...1 .... - // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | - // eq<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; - - Emit(WHILELE_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); -} - -void Assembler::whilelo(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - // WHILELO <Pd>.<T>, <R><n>, <R><m> - // 0010 0101 ..1. .... 000. 11.. ...0 .... - // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | - // eq<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; - - Emit(WHILELO_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); -} - -void Assembler::whilels(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - // WHILELS <Pd>.<T>, <R><n>, <R><m> - // 0010 0101 ..1. .... 000. 11.. ...1 .... - // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | - // eq<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; - - Emit(WHILELS_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); -} - -void Assembler::whilelt(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - // WHILELT <Pd>.<T>, <R><n>, <R><m> - // 0010 0101 ..1. .... 000. 01.. ...0 .... - // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | - // eq<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameSizeAndType(rn, rm)); - const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; - - Emit(WHILELT_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); -} - -void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEIntCompareVectorsOp op) { - Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm, - SVEIntCompareSignedImmOp op) { - Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm)); -} - -void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm, - SVEIntCompareUnsignedImmOp op) { - Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | - ImmUnsignedField<20, 14>(imm)); -} - -void Assembler::cmp(Condition cond, - const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - switch (cond) { - case eq: - cmpeq(pd, pg, zn, zm); - break; - case ge: - cmpge(pd, pg, zn, zm); - break; - case gt: - cmpgt(pd, pg, zn, zm); - break; - case le: - cmple(pd, pg, zn, zm); - break; - case lt: - cmplt(pd, pg, zn, zm); - break; - case ne: - cmpne(pd, pg, zn, zm); - break; - case hi: - cmphi(pd, pg, zn, zm); - break; - case hs: - cmphs(pd, pg, zn, zm); - break; - case lo: - cmplo(pd, pg, zn, zm); - break; - case ls: - cmpls(pd, pg, zn, zm); - break; - default: - VIXL_UNREACHABLE(); - } -} - -// SVEIntCompareSignedImm. - -void Assembler::cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 100. .... ...0 .... - // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> - // | ne<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPEQ_p_p_zi); -} - -void Assembler::cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 000. .... ...0 .... - // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> - // | ne<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPGE_p_p_zi); -} - -void Assembler::cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 000. .... ...1 .... - // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> - // | ne<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPGT_p_p_zi); -} - -void Assembler::cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 001. .... ...1 .... - // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> - // | ne<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPLE_p_p_zi); -} - -void Assembler::cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 001. .... ...0 .... - // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> - // | ne<4> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPLT_p_p_zi); -} - -void Assembler::cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0101 ..0. .... 100. .... ...1 .... - // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> - // | ne<4> = 1 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm5, CMPNE_p_p_zi); -} - -// SVEIntCompareUnsignedImm. - -void Assembler::cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7) { - // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0100 ..1. .... ..0. .... ...1 .... - // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm7, CMPHI_p_p_zi); -} - -void Assembler::cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7) { - // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0100 ..1. .... ..0. .... ...0 .... - // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm7, CMPHS_p_p_zi); -} - -void Assembler::cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7) { - // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0100 ..1. .... ..1. .... ...0 .... - // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm7, CMPLO_p_p_zi); -} - -void Assembler::cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - unsigned imm7) { - // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> - // 0010 0100 ..1. .... ..1. .... ...1 .... - // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - - CompareVectors(pd, pg, zn, imm7, CMPLS_p_p_zi); -} - -// SVEIntCompareVectors. - -// This prototype maps to 2 instruction encodings: -// CMPEQ_p_p_zw -// CMPEQ_p_p_zz -void Assembler::cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPEQ_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPEQ_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -// This prototype maps to 2 instruction encodings: -// CMPGE_p_p_zw -// CMPGE_p_p_zz -void Assembler::cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPGE_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPGE_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -// This prototype maps to 2 instruction encodings: -// CMPGT_p_p_zw -// CMPGT_p_p_zz -void Assembler::cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPGT_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPGT_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -// This prototype maps to 2 instruction encodings: -// CMPHI_p_p_zw -// CMPHI_p_p_zz -void Assembler::cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPHI_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPHI_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -// This prototype maps to 2 instruction encodings: -// CMPHS_p_p_zw -// CMPHS_p_p_zz -void Assembler::cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPHS_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPHS_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -void Assembler::cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - if (AreSameLaneSize(zn, zm)) { - cmpge(pd, pg, zm, zn); - return; - } - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(!zn.IsLaneSizeD()); - - CompareVectors(pd, pg, zn, zm, CMPLE_p_p_zw); -} - -void Assembler::cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - if (AreSameLaneSize(zn, zm)) { - cmphi(pd, pg, zm, zn); - return; - } - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(!zn.IsLaneSizeD()); - - CompareVectors(pd, pg, zn, zm, CMPLO_p_p_zw); -} - -void Assembler::cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - if (AreSameLaneSize(zn, zm)) { - cmphs(pd, pg, zm, zn); - return; - } - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(!zn.IsLaneSizeD()); - - CompareVectors(pd, pg, zn, zm, CMPLS_p_p_zw); -} - -void Assembler::cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - if (AreSameLaneSize(zn, zm)) { - cmpgt(pd, pg, zm, zn); - return; - } - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(!zn.IsLaneSizeD()); - - CompareVectors(pd, pg, zn, zm, CMPLT_p_p_zw); -} - -// This prototype maps to 2 instruction encodings: -// CMPNE_p_p_zw -// CMPNE_p_p_zz -void Assembler::cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, zn)); - SVEIntCompareVectorsOp op = CMPNE_p_p_zz; - if (!AreSameLaneSize(zn, zm)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - op = CMPNE_p_p_zw; - } - CompareVectors(pd, pg, zn, zm, op); -} - -// SVEIntMiscUnpredicated. - -void Assembler::fexpa(const ZRegister& zd, const ZRegister& zn) { - // FEXPA <Zd>.<T>, <Zn>.<T> - // 0000 0100 ..10 0000 1011 10.. .... .... - // size<23:22> | opc<20:16> = 00000 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FEXPA_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::ftssel(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..1. .... 1011 00.. .... .... - // size<23:22> | Zm<20:16> | op<10> = 0 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FTSSEL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::movprfx(const ZRegister& zd, const ZRegister& zn) { - // MOVPRFX <Zd>, <Zn> - // 0000 0100 0010 0000 1011 11.. .... .... - // opc<23:22> = 00 | opc2<20:16> = 00000 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(MOVPRFX_z_z | Rd(zd) | Rn(zn)); -} - -// SVEIntMulAddPredicated. - -void Assembler::mad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // MAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0000 0100 ..0. .... 110. .... .... .... - // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Za<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - - Emit(MAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); -} - -void Assembler::mla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // MLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..0. .... 010. .... .... .... - // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - - Emit(MLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::mls(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - // MLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T> - // 0000 0100 ..0. .... 011. .... .... .... - // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Zn<9:5> | Zda<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); - - Emit(MLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); -} - -void Assembler::msb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - // MSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T> - // 0000 0100 ..0. .... 111. .... .... .... - // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Za<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); - - Emit(MSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); -} - -// SVEIntMulAddUnpredicated. - -void Assembler::sdot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); - VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); - VIXL_ASSERT(AreSameLaneSize(zm, zn)); - - Emit(SDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); -} - -void Assembler::udot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); - VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); - VIXL_ASSERT(AreSameLaneSize(zm, zn)); - - Emit(UDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); -} - -// SVEIntReduction. - -void Assembler::andv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(ANDV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::eorv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(EORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::movprfx(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn) { - // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T> - // 0000 0100 ..01 000. 001. .... .... .... - // size<23:22> | opc<18:17> = 00 | M<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - VIXL_ASSERT(!pg.HasLaneSize()); - - Instr m = pg.IsMerging() ? 0x00010000 : 0x00000000; - Emit(MOVPRFX_z_p_z | SVESize(zd) | m | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::orv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(ORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::saddv(const VRegister& dd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zn.GetLaneSizeInBytes() != kDRegSizeInBytes); - - Emit(SADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::smaxv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(SMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::sminv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(SMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::uaddv(const VRegister& dd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(UADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::umaxv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(UMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::uminv(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(UMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -// SVEIntUnaryArithmeticPredicated. - -void Assembler::abs(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0110 101. .... .... .... - // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(ABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::cls(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1000 101. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(CLS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::clz(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1001 101. .... .... .... - // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(CLZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::cnot(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1011 101. .... .... .... - // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(CNOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::cnt(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1010 101. .... .... .... - // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(CNT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fabs(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1100 101. .... .... .... - // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::fneg(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1101 101. .... .... .... - // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Emit(FNEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::neg(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0111 101. .... .... .... - // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(NEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::not_(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 1110 101. .... .... .... - // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(NOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::sxtb(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0000 101. .... .... .... - // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); - - Emit(SXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::sxth(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0010 101. .... .... .... - // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); - - Emit(SXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::sxtw(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // SXTW <Zd>.D, <Pg>/M, <Zn>.D - // 0000 0100 ..01 0100 101. .... .... .... - // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); - - Emit(SXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::uxtb(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0001 101. .... .... .... - // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); - - Emit(UXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::uxth(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0100 ..01 0011 101. .... .... .... - // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); - - Emit(UXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::uxtw(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // UXTW <Zd>.D, <Pg>/M, <Zn>.D - // 0000 0100 ..01 0101 101. .... .... .... - // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); - - Emit(UXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -// SVEIntWideImmPredicated. - -void Assembler::cpy(const ZRegister& zd, - const PRegister& pg, - int imm8, - int shift) { - // CPY <Zd>.<T>, <Pg>/<ZM>, #<imm>{, <shift>} - // 0000 0101 ..01 .... 0... .... .... .... - // size<23:22> | Pg<19:16> | M<14> | sh<13> | imm8<12:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - - ResolveSVEImm8Shift(&imm8, &shift); - - Instr sh = (shift > 0) ? (1 << 13) : 0; - Instr m = pg.IsMerging() ? (1 << 14) : 0; - Emit(CPY_z_p_i | m | sh | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | - ImmField<12, 5>(imm8)); -} - -void Assembler::fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) { - // FCPY <Zd>.<T>, <Pg>/M, #<const> - // 0000 0101 ..01 .... 110. .... .... .... - // size<23:22> | Pg<19:16> | imm8<12:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Instr imm_field = ImmUnsignedField<12, 5>(FP64ToImm8(imm)); - Emit(FCPY_z_p_i | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | imm_field); -} - -// SVEIntAddSubtractImmUnpredicated. - -void Assembler::SVEIntAddSubtractImmUnpredicatedHelper( - SVEIntAddSubtractImm_UnpredicatedOp op, - const ZRegister& zd, - int imm8, - int shift) { - if (shift < 0) { - VIXL_ASSERT(shift == -1); - // Derive the shift amount from the immediate. - if (IsUint8(imm8)) { - shift = 0; - } else if (IsUint16(imm8) && ((imm8 % 256) == 0)) { - imm8 /= 256; - shift = 8; - } - } - - VIXL_ASSERT(IsUint8(imm8)); - VIXL_ASSERT((shift == 0) || (shift == 8)); - - Instr shift_bit = (shift > 0) ? (1 << 13) : 0; - Emit(op | SVESize(zd) | Rd(zd) | shift_bit | ImmUnsignedField<12, 5>(imm8)); -} - -void Assembler::add(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0000 11.. .... .... .... - // size<23:22> | opc<18:16> = 000 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(ADD_z_zi, zd, imm8, shift); -} - -void Assembler::dup(const ZRegister& zd, int imm8, int shift) { - // DUP <Zd>.<T>, #<imm>{, <shift>} - // 0010 0101 ..11 1000 11.. .... .... .... - // size<23:22> | opc<18:17> = 00 | sh<13> | imm8<12:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - ResolveSVEImm8Shift(&imm8, &shift); - VIXL_ASSERT((shift < 8) || !zd.IsLaneSizeB()); - - Instr shift_bit = (shift > 0) ? (1 << 13) : 0; - Emit(DUP_z_i | SVESize(zd) | Rd(zd) | shift_bit | ImmField<12, 5>(imm8)); -} - -void Assembler::fdup(const ZRegister& zd, double imm) { - // FDUP <Zd>.<T>, #<const> - // 0010 0101 ..11 1001 110. .... .... .... - // size<23:22> | opc<18:17> = 00 | o2<13> = 0 | imm8<12:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); - - Instr encoded_imm = FP64ToImm8(imm) << 5; - Emit(FDUP_z_i | SVESize(zd) | encoded_imm | Rd(zd)); -} - -void Assembler::mul(const ZRegister& zd, const ZRegister& zn, int imm8) { - // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm> - // 0010 0101 ..11 0000 110. .... .... .... - // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(MUL_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); -} - -void Assembler::smax(const ZRegister& zd, const ZRegister& zn, int imm8) { - // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> - // 0010 0101 ..10 1000 110. .... .... .... - // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(SMAX_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); -} - -void Assembler::smin(const ZRegister& zd, const ZRegister& zn, int imm8) { - // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> - // 0010 0101 ..10 1010 110. .... .... .... - // size<23:22> | opc<18:16> = 010 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(SMIN_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); -} - -void Assembler::sqadd(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0100 11.. .... .... .... - // size<23:22> | opc<18:16> = 100 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(SQADD_z_zi, zd, imm8, shift); -} - -void Assembler::sqsub(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0110 11.. .... .... .... - // size<23:22> | opc<18:16> = 110 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(SQSUB_z_zi, zd, imm8, shift); -} - -void Assembler::sub(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0001 11.. .... .... .... - // size<23:22> | opc<18:16> = 001 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(SUB_z_zi, zd, imm8, shift); -} - -void Assembler::subr(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0011 11.. .... .... .... - // size<23:22> | opc<18:16> = 011 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(SUBR_z_zi, zd, imm8, shift); -} - -void Assembler::umax(const ZRegister& zd, const ZRegister& zn, int imm8) { - // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm> - // 0010 0101 ..10 1001 110. .... .... .... - // size<23:22> | opc<18:16> = 001 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(UMAX_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); -} - -void Assembler::umin(const ZRegister& zd, const ZRegister& zn, int imm8) { - // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm> - // 0010 0101 ..10 1011 110. .... .... .... - // size<23:22> | opc<18:16> = 011 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(UMIN_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); -} - -void Assembler::uqadd(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0101 11.. .... .... .... - // size<23:22> | opc<18:16> = 101 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(UQADD_z_zi, zd, imm8, shift); -} - -void Assembler::uqsub(const ZRegister& zd, - const ZRegister& zn, - int imm8, - int shift) { - // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>} - // 0010 0101 ..10 0111 11.. .... .... .... - // size<23:22> | opc<18:16> = 111 | sh<13> | imm8<12:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - SVEIntAddSubtractImmUnpredicatedHelper(UQSUB_z_zi, zd, imm8, shift); -} - -// SVEMemLoad. - -void Assembler::SVELdSt1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - bool is_signed, - Instr op) { - VIXL_ASSERT(addr.IsContiguous()); - - Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr); - Instr dtype = - SVEDtype(msize_in_bytes_log2, zt.GetLaneSizeInBytesLog2(), is_signed); - Emit(op | mem_op | dtype | Rt(zt) | PgLow8(pg)); -} - -void Assembler::SVELdSt234Helper(int num_regs, - const ZRegister& zt1, - const PRegister& pg, - const SVEMemOperand& addr, - Instr op) { - VIXL_ASSERT((num_regs >= 2) && (num_regs <= 4)); - - unsigned msize_in_bytes_log2 = zt1.GetLaneSizeInBytesLog2(); - Instr num = (num_regs - 1) << 21; - Instr msz = msize_in_bytes_log2 << 23; - Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, num_regs, addr); - Emit(op | mem_op | msz | num | Rt(zt1) | PgLow8(pg)); -} - -void Assembler::SVELd1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed) { - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); - if (is_signed) { - // Sign-extension is only possible when the vector elements are larger than - // the elements in memory. - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); - } - - if (addr.IsScatterGather()) { - bool is_load = true; - bool is_ff = false; - SVEScatterGatherHelper(msize_in_bytes_log2, - zt, - pg, - addr, - is_load, - is_signed, - is_ff); - return; - } - - Instr op = 0xffffffff; - if (addr.IsScalarPlusImmediate()) { - op = SVEContiguousLoad_ScalarPlusImmFixed; - } else if (addr.IsScalarPlusScalar()) { - // Rm must not be xzr. - VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); - op = SVEContiguousLoad_ScalarPlusScalarFixed; - } else { - VIXL_UNIMPLEMENTED(); - } - SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); -} - -void Assembler::SVELdff1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed) { - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); - if (is_signed) { - // Sign-extension is only possible when the vector elements are larger than - // the elements in memory. - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); - } - - if (addr.IsScatterGather()) { - bool is_load = true; - bool is_ff = true; - SVEScatterGatherHelper(msize_in_bytes_log2, - zt, - pg, - addr, - is_load, - is_signed, - is_ff); - return; - } - - if (addr.IsPlainScalar()) { - // SVEMemOperand(x0) is treated as a scalar-plus-immediate form ([x0, #0]). - // In these instructions, we want to treat it as [x0, xzr]. - SVEMemOperand addr_scalar_plus_scalar(addr.GetScalarBase(), xzr); - // Guard against infinite recursion. - VIXL_ASSERT(!addr_scalar_plus_scalar.IsPlainScalar()); - SVELdff1Helper(msize_in_bytes_log2, - zt, - pg, - addr_scalar_plus_scalar, - is_signed); - return; - } - - Instr op = 0xffffffff; - if (addr.IsScalarPlusScalar()) { - op = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed; - } else { - VIXL_UNIMPLEMENTED(); - } - SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); -} - -void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - bool is_load, - bool is_signed, - bool is_first_fault) { - VIXL_ASSERT(addr.IsScatterGather()); - VIXL_ASSERT(zt.IsLaneSizeS() || zt.IsLaneSizeD()); - VIXL_ASSERT(is_load || !is_first_fault); - VIXL_ASSERT(is_load || !is_signed); - - Instr op = 0xffffffff; - if (addr.IsVectorPlusImmediate()) { - VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorBase())); - if (is_load) { - if (zt.IsLaneSizeS()) { - op = SVE32BitGatherLoad_VectorPlusImmFixed; - } else { - op = SVE64BitGatherLoad_VectorPlusImmFixed; - } - } else { - if (zt.IsLaneSizeS()) { - op = SVE32BitScatterStore_VectorPlusImmFixed; - } else { - op = SVE64BitScatterStore_VectorPlusImmFixed; - } - } - } else { - VIXL_ASSERT(addr.IsScalarPlusVector()); - VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset())); - SVEOffsetModifier mod = addr.GetOffsetModifier(); - if (zt.IsLaneSizeS()) { - VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW)); - unsigned shift_amount = addr.GetShiftAmount(); - if (shift_amount == 0) { - if (is_load) { - op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed; - } else { - op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed; - } - } else if (shift_amount == 1) { - VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2); - if (is_load) { - op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed; - } else { - op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; - } - } else { - VIXL_ASSERT(shift_amount == 2); - VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2); - if (is_load) { - op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed; - } else { - op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; - } - } - } else if (zt.IsLaneSizeD()) { - switch (mod) { - case NO_SVE_OFFSET_MODIFIER: - if (is_load) { - op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed; - } else { - op = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed; - } - break; - case SVE_LSL: - if (is_load) { - op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed; - } else { - op = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed; - } - break; - case SVE_UXTW: - case SVE_SXTW: { - unsigned shift_amount = addr.GetShiftAmount(); - if (shift_amount == 0) { - if (is_load) { - op = - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; - } else { - op = - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; - } - } else { - VIXL_ASSERT(shift_amount == msize_in_bytes_log2); - if (is_load) { - op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; - } else { - op = - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed; - } - } - break; - } - default: - VIXL_UNIMPLEMENTED(); - } - } - } - - Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load); - Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2); - Instr u = (!is_load || is_signed) ? 0 : (1 << 14); - Instr ff = is_first_fault ? (1 << 13) : 0; - Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg)); -} - -void Assembler::SVELd234Helper(int num_regs, - const ZRegister& zt1, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - if (addr.IsScalarPlusScalar()) { - // Rm must not be xzr. - VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); - } - - Instr op; - if (addr.IsScalarPlusImmediate()) { - op = SVELoadMultipleStructures_ScalarPlusImmFixed; - } else if (addr.IsScalarPlusScalar()) { - op = SVELoadMultipleStructures_ScalarPlusScalarFixed; - } else { - // These instructions don't support any other addressing modes. - VIXL_ABORT(); - } - SVELdSt234Helper(num_regs, zt1, pg, addr, op); -} - -// SVEMemContiguousLoad. - -#define VIXL_DEFINE_LD1(MSZ, LANE_SIZE) \ - void Assembler::ld1##MSZ(const ZRegister& zt, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ - } -#define VIXL_DEFINE_LD2(MSZ, LANE_SIZE) \ - void Assembler::ld2##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVELd234Helper(2, zt1, pg, addr); \ - } -#define VIXL_DEFINE_LD3(MSZ, LANE_SIZE) \ - void Assembler::ld3##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const ZRegister& zt3, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2, zt3); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVELd234Helper(3, zt1, pg, addr); \ - } -#define VIXL_DEFINE_LD4(MSZ, LANE_SIZE) \ - void Assembler::ld4##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const ZRegister& zt3, \ - const ZRegister& zt4, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2, zt3, zt4); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVELd234Helper(4, zt1, pg, addr); \ - } - -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD1) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD2) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD3) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD4) - -#define VIXL_DEFINE_LD1S(MSZ, LANE_SIZE) \ - void Assembler::ld1s##MSZ(const ZRegister& zt, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ - } -VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LD1S) - -// SVEMem32BitGatherAndUnsizedContiguous. - -void Assembler::SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - bool is_signed) { - VIXL_ASSERT(addr.IsScalarPlusImmediate()); - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); - if (is_signed) { - // Sign-extension is only possible when the vector elements are larger than - // the elements in memory. - VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); - } - - int64_t imm = addr.GetImmediateOffset(); - int divisor = 1 << msize_in_bytes_log2; - VIXL_ASSERT(imm % divisor == 0); - Instr dtype = SVEDtypeSplit(msize_in_bytes_log2, - zt.GetLaneSizeInBytesLog2(), - is_signed); - - Emit(SVELoadAndBroadcastElementFixed | dtype | RnSP(addr.GetScalarBase()) | - ImmUnsignedField<21, 16>(imm / divisor) | Rt(zt) | PgLow8(pg)); -} - -// This prototype maps to 4 instruction encodings: -// LD1RB_z_p_bi_u16 -// LD1RB_z_p_bi_u32 -// LD1RB_z_p_bi_u64 -// LD1RB_z_p_bi_u8 -void Assembler::ld1rb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, false); -} - -// This prototype maps to 3 instruction encodings: -// LD1RH_z_p_bi_u16 -// LD1RH_z_p_bi_u32 -// LD1RH_z_p_bi_u64 -void Assembler::ld1rh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, false); -} - -// This prototype maps to 2 instruction encodings: -// LD1RW_z_p_bi_u32 -// LD1RW_z_p_bi_u64 -void Assembler::ld1rw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kSRegSizeInBytesLog2, zt, pg, addr, false); -} - -void Assembler::ld1rd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kDRegSizeInBytesLog2, zt, pg, addr, false); -} - -// This prototype maps to 3 instruction encodings: -// LD1RSB_z_p_bi_s16 -// LD1RSB_z_p_bi_s32 -// LD1RSB_z_p_bi_s64 -void Assembler::ld1rsb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, true); -} - -// This prototype maps to 2 instruction encodings: -// LD1RSH_z_p_bi_s32 -// LD1RSH_z_p_bi_s64 -void Assembler::ld1rsh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, true); -} - -void Assembler::ld1rsw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - SVELd1BroadcastHelper(kWRegSizeInBytesLog2, zt, pg, addr, true); -} - -void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) { - // LDR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}] - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - int64_t imm9 = addr.GetImmediateOffset(); - VIXL_ASSERT(IsInt9(imm9)); - Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; - Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; - - Instr op = LDR_z_bi; - if (rt.IsPRegister()) { - op = LDR_p_bi; - } - Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); -} - -// SVEMem64BitGather. - -// This prototype maps to 3 instruction encodings: -// LDFF1B_z_p_bz_d_64_unscaled -// LDFF1B_z_p_bz_d_x32_unscaled -void Assembler::ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] - // 1100 0100 010. .... 111. .... .... .... - // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1B_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 2 instruction encodings: -// LDFF1B_z_p_ai_d -// LDFF1B_z_p_ai_s -void Assembler::ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0100 001. .... 111. .... .... .... - // msz<24:23> = 00 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1B_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); -} - -// This prototype maps to 4 instruction encodings: -// LDFF1D_z_p_bz_d_64_scaled -// LDFF1D_z_p_bz_d_64_unscaled -// LDFF1D_z_p_bz_d_x32_scaled -// LDFF1D_z_p_bz_d_x32_unscaled -void Assembler::ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3] - // 1100 0101 111. .... 111. .... .... .... - // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1D_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -void Assembler::ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0101 101. .... 111. .... .... .... - // msz<24:23> = 11 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1D_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); -} - -// This prototype maps to 6 instruction encodings: -// LDFF1H_z_p_bz_d_64_scaled -// LDFF1H_z_p_bz_d_64_unscaled -// LDFF1H_z_p_bz_d_x32_scaled -// LDFF1H_z_p_bz_d_x32_unscaled -void Assembler::ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] - // 1100 0100 111. .... 111. .... .... .... - // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1H_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 2 instruction encodings: -// LDFF1H_z_p_ai_d -// LDFF1H_z_p_ai_s -void Assembler::ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0100 101. .... 111. .... .... .... - // msz<24:23> = 01 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1H_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); -} - -// This prototype maps to 3 instruction encodings: -// LDFF1SB_z_p_bz_d_64_unscaled -// LDFF1SB_z_p_bz_d_x32_unscaled -void Assembler::ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D] - // 1100 0100 010. .... 101. .... .... .... - // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SB_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 2 instruction encodings: -// LDFF1SB_z_p_ai_d -// LDFF1SB_z_p_ai_s -void Assembler::ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0100 001. .... 101. .... .... .... - // msz<24:23> = 00 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SB_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | - ImmField<20, 16>(imm5)); -} - -// This prototype maps to 6 instruction encodings: -// LDFF1SH_z_p_bz_d_64_scaled -// LDFF1SH_z_p_bz_d_64_unscaled -// LDFF1SH_z_p_bz_d_x32_scaled -// LDFF1SH_z_p_bz_d_x32_unscaled -void Assembler::ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1] - // 1100 0100 111. .... 101. .... .... .... - // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SH_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 2 instruction encodings: -// LDFF1SH_z_p_ai_d -// LDFF1SH_z_p_ai_s -void Assembler::ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0100 101. .... 101. .... .... .... - // msz<24:23> = 01 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SH_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | - ImmField<20, 16>(imm5)); -} - -// This prototype maps to 4 instruction encodings: -// LDFF1SW_z_p_bz_d_64_scaled -// LDFF1SW_z_p_bz_d_64_unscaled -// LDFF1SW_z_p_bz_d_x32_scaled -// LDFF1SW_z_p_bz_d_x32_unscaled -void Assembler::ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] - // 1100 0101 011. .... 101. .... .... .... - // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SW_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -void Assembler::ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0101 001. .... 101. .... .... .... - // msz<24:23> = 10 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1SW_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | - ImmField<20, 16>(imm5)); -} - -// This prototype maps to 6 instruction encodings: -// LDFF1W_z_p_bz_d_64_scaled -// LDFF1W_z_p_bz_d_64_unscaled -// LDFF1W_z_p_bz_d_x32_scaled -// LDFF1W_z_p_bz_d_x32_unscaled -void Assembler::ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2] - // 1100 0101 011. .... 111. .... .... .... - // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> - // | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1W_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); -} - -// This prototype maps to 2 instruction encodings: -// LDFF1W_z_p_ai_d -// LDFF1W_z_p_ai_s -void Assembler::ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}] - // 1100 0101 001. .... 111. .... .... .... - // msz<24:23> = 10 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | - // Zn<9:5> | Zt<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LDFF1W_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); -} - -void Assembler::SVEGatherPrefetchVectorPlusImmediateHelper( - PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size) { - VIXL_ASSERT(addr.IsVectorPlusImmediate()); - ZRegister zn = addr.GetVectorBase(); - VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); - - Instr op = 0xffffffff; - switch (prefetch_size) { - case kBRegSize: - op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_ai_s) - : static_cast<Instr>(PRFB_i_p_ai_d); - break; - case kHRegSize: - op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_ai_s) - : static_cast<Instr>(PRFH_i_p_ai_d); - break; - case kSRegSize: - op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_ai_s) - : static_cast<Instr>(PRFW_i_p_ai_d); - break; - case kDRegSize: - op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_ai_s) - : static_cast<Instr>(PRFD_i_p_ai_d); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - int64_t imm5 = addr.GetImmediateOffset(); - Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | Rn(zn) | - ImmUnsignedField<20, 16>(imm5)); -} - -void Assembler::SVEGatherPrefetchScalarPlusImmediateHelper( - PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size) { - VIXL_ASSERT(addr.IsScalarPlusImmediate()); - int64_t imm6 = addr.GetImmediateOffset(); - - Instr op = 0xffffffff; - switch (prefetch_size) { - case kBRegSize: - op = PRFB_i_p_bi_s; - break; - case kHRegSize: - op = PRFH_i_p_bi_s; - break; - case kSRegSize: - op = PRFW_i_p_bi_s; - break; - case kDRegSize: - op = PRFD_i_p_bi_s; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | - RnSP(addr.GetScalarBase()) | ImmField<21, 16>(imm6)); -} - -void Assembler::SVEContiguousPrefetchScalarPlusScalarHelper( - PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size) { - VIXL_ASSERT(addr.IsScalarPlusScalar()); - Instr op = 0xffffffff; - - switch (prefetch_size) { - case kBRegSize: - VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER); - op = PRFB_i_p_br_s; - break; - case kHRegSize: - VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); - VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); - op = PRFH_i_p_br_s; - break; - case kSRegSize: - VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); - VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); - op = PRFW_i_p_br_s; - break; - case kDRegSize: - VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); - VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); - op = PRFD_i_p_br_s; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); - Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | - RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset())); -} - -void Assembler::SVEContiguousPrefetchScalarPlusVectorHelper( - PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size) { - VIXL_ASSERT(addr.IsScalarPlusVector()); - ZRegister zm = addr.GetVectorOffset(); - SVEOffsetModifier mod = addr.GetOffsetModifier(); - - // All prefetch scalar-plus-vector addressing modes use a shift corresponding - // to the element size. - switch (prefetch_size) { - case kBRegSize: - VIXL_ASSERT(addr.GetShiftAmount() == kBRegSizeInBytesLog2); - break; - case kHRegSize: - VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); - break; - case kSRegSize: - VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); - break; - case kDRegSize: - VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - Instr sx = 0; - Instr op = 0xffffffff; - if ((mod == NO_SVE_OFFSET_MODIFIER) || (mod == SVE_LSL)) { - VIXL_ASSERT(zm.IsLaneSizeD()); - - switch (prefetch_size) { - case kBRegSize: - VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); - op = PRFB_i_p_bz_d_64_scaled; - break; - case kHRegSize: - VIXL_ASSERT(mod == SVE_LSL); - op = PRFH_i_p_bz_d_64_scaled; - break; - case kSRegSize: - VIXL_ASSERT(mod == SVE_LSL); - op = PRFW_i_p_bz_d_64_scaled; - break; - case kDRegSize: - VIXL_ASSERT(mod == SVE_LSL); - op = PRFD_i_p_bz_d_64_scaled; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } else { - VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); - VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); - - switch (prefetch_size) { - case kBRegSize: - op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_bz_s_x32_scaled) - : static_cast<Instr>(PRFB_i_p_bz_d_x32_scaled); - break; - case kHRegSize: - op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_bz_s_x32_scaled) - : static_cast<Instr>(PRFH_i_p_bz_d_x32_scaled); - break; - case kSRegSize: - op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_bz_s_x32_scaled) - : static_cast<Instr>(PRFW_i_p_bz_d_x32_scaled); - break; - case kDRegSize: - op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_bz_s_x32_scaled) - : static_cast<Instr>(PRFD_i_p_bz_d_x32_scaled); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (mod == SVE_SXTW) { - sx = 1 << 22; - } - } - - Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | sx | - RnSP(addr.GetScalarBase()) | Rm(zm)); -} - -void Assembler::SVEPrefetchHelper(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr, - int prefetch_size) { - if (addr.IsVectorPlusImmediate()) { - // For example: - // [z0.s, #0] - SVEGatherPrefetchVectorPlusImmediateHelper(prfop, pg, addr, prefetch_size); - - } else if (addr.IsScalarPlusImmediate()) { - // For example: - // [x0, #42, mul vl] - SVEGatherPrefetchScalarPlusImmediateHelper(prfop, pg, addr, prefetch_size); - - } else if (addr.IsScalarPlusVector()) { - // For example: - // [x0, z0.s, sxtw] - SVEContiguousPrefetchScalarPlusVectorHelper(prfop, pg, addr, prefetch_size); - - } else if (addr.IsScalarPlusScalar()) { - // For example: - // [x0, x1] - SVEContiguousPrefetchScalarPlusScalarHelper(prfop, pg, addr, prefetch_size); - - } else { - VIXL_UNIMPLEMENTED(); - } -} - -void Assembler::prfb(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - SVEPrefetchHelper(prfop, pg, addr, kBRegSize); -} - -void Assembler::prfd(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - SVEPrefetchHelper(prfop, pg, addr, kDRegSize); -} - -void Assembler::prfh(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - SVEPrefetchHelper(prfop, pg, addr, kHRegSize); -} - -void Assembler::prfw(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - SVEPrefetchHelper(prfop, pg, addr, kSRegSize); -} - -void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr, - Instr regoffset_op, - Instr immoffset_op, - int imm_divisor) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsScalarPlusScalar() || addr.IsScalarPlusImmediate()); - - Instr op; - if (addr.IsScalarPlusScalar()) { - op = regoffset_op | Rm(addr.GetScalarOffset()); - } else { - int64_t imm = addr.GetImmediateOffset(); - VIXL_ASSERT(((imm % imm_divisor) == 0) && IsInt4(imm / imm_divisor)); - op = immoffset_op | ImmField<19, 16>(imm / imm_divisor); - } - Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase())); -} - -void Assembler::ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0)); - VIXL_ASSERT(zt.IsLaneSizeB()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQB_z_p_br_contiguous, - LD1RQB_z_p_bi_u8, - 16); -} - -void Assembler::ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3)); - VIXL_ASSERT(zt.IsLaneSizeD()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQD_z_p_br_contiguous, - LD1RQD_z_p_bi_u64, - 16); -} - -void Assembler::ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1)); - VIXL_ASSERT(zt.IsLaneSizeH()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQH_z_p_br_contiguous, - LD1RQH_z_p_bi_u16, - 16); -} - -void Assembler::ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2)); - VIXL_ASSERT(zt.IsLaneSizeS()); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LD1RQW_z_p_br_contiguous, - LD1RQW_z_p_bi_u32, - 16); -} - -#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \ - void Assembler::ldff1##MSZ(const ZRegister& zt, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ - } -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LDFF1) - -#define VIXL_DEFINE_LDFF1S(MSZ, LANE_SIZE) \ - void Assembler::ldff1s##MSZ(const ZRegister& zt, \ - const PRegisterZ& pg, \ - const SVEMemOperand& addr) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ - } -VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LDFF1S) - -void Assembler::ldnf1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(0, - zt, - pg, - addr, - /* is_signed = */ false, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(3, - zt, - pg, - addr, - /* is_signed = */ false, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(1, - zt, - pg, - addr, - /* is_signed = */ false, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(0, - zt, - pg, - addr, - /* is_signed = */ true, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(1, - zt, - pg, - addr, - /* is_signed = */ true, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(2, - zt, - pg, - addr, - /* is_signed = */ true, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnf1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(addr.IsPlainRegister() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - - SVELdSt1Helper(2, - zt, - pg, - addr, - /* is_signed = */ false, - SVEContiguousNonFaultLoad_ScalarPlusImmFixed); -} - -void Assembler::ldnt1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1B_z_p_br_contiguous, - LDNT1B_z_p_bi_contiguous); -} - -void Assembler::ldnt1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1D_z_p_br_contiguous, - LDNT1D_z_p_bi_contiguous); -} - -void Assembler::ldnt1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1H_z_p_br_contiguous, - LDNT1H_z_p_bi_contiguous); -} - -void Assembler::ldnt1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - LDNT1W_z_p_br_contiguous, - LDNT1W_z_p_bi_contiguous); -} - -Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, - int num_regs, - const SVEMemOperand& addr, - bool is_load) { - VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4)); - - Instr op = 0xfffffff; - if (addr.IsScalarPlusImmediate()) { - VIXL_ASSERT((addr.GetImmediateOffset() == 0) || addr.IsMulVl()); - int64_t imm = addr.GetImmediateOffset(); - VIXL_ASSERT((imm % num_regs) == 0); - op = RnSP(addr.GetScalarBase()) | ImmField<19, 16>(imm / num_regs); - - } else if (addr.IsScalarPlusScalar()) { - VIXL_ASSERT(addr.GetScalarOffset().IsZero() || - addr.IsEquivalentToLSL(msize_in_bytes_log2)); - op = RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset()); - - } else if (addr.IsVectorPlusImmediate()) { - ZRegister zn = addr.GetVectorBase(); - uint64_t imm = addr.GetImmediateOffset(); - VIXL_ASSERT(num_regs == 1); - VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); - VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2))); - op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2); - - } else if (addr.IsScalarPlusVector()) { - // We have to support several different addressing modes. Some instructions - // support a subset of these, but the SVEMemOperand encoding is consistent. - Register xn = addr.GetScalarBase(); - ZRegister zm = addr.GetVectorOffset(); - SVEOffsetModifier mod = addr.GetOffsetModifier(); - Instr modifier_bit = 1 << (is_load ? 22 : 14); - Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0; - VIXL_ASSERT(num_regs == 1); - - if (mod == SVE_LSL) { - // 64-bit scaled offset: [<Xn|SP>, <Zm>.D, LSL #<shift>] - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(addr.GetShiftAmount() == msize_in_bytes_log2); - } else if (mod == NO_SVE_OFFSET_MODIFIER) { - // 64-bit unscaled offset: [<Xn|SP>, <Zm>.D] - VIXL_ASSERT(zm.IsLaneSizeD()); - VIXL_ASSERT(addr.GetShiftAmount() == 0); - } else { - // 32-bit scaled offset: [<Xn|SP>, <Zm>.S, <mod> #<shift>] - // 32-bit unscaled offset: [<Xn|SP>, <Zm>.S, <mod>] - // 32-bit unpacked scaled offset: [<Xn|SP>, <Zm>.D, <mod> #<shift>] - // 32-bit unpacked unscaled offset: [<Xn|SP>, <Zm>.D, <mod>] - VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); - VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); - VIXL_ASSERT((addr.GetShiftAmount() == 0) || - (addr.GetShiftAmount() == msize_in_bytes_log2)); - } - - // The form itself is encoded in the instruction opcode. - op = RnSP(xn) | Rm(zm) | xs; - } else { - VIXL_UNIMPLEMENTED(); - } - - return op; -} - -// SVEMemStore. - -void Assembler::SVESt1Helper(unsigned msize_in_bytes_log2, - const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - if (addr.IsScalarPlusScalar()) { - // Rm must not be xzr. - VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); - } - - if (addr.IsScatterGather()) { - bool is_load = false; - bool is_signed = false; - bool is_ff = false; - SVEScatterGatherHelper(msize_in_bytes_log2, - zt, - pg, - addr, - is_load, - is_signed, - is_ff); - return; - } - - Instr op; - if (addr.IsScalarPlusImmediate()) { - op = SVEContiguousStore_ScalarPlusImmFixed; - } else if (addr.IsScalarPlusScalar()) { - op = SVEContiguousStore_ScalarPlusScalarFixed; - } else { - VIXL_UNIMPLEMENTED(); - op = 0xffffffff; - } - SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, false, op); -} - -void Assembler::SVESt234Helper(int num_regs, - const ZRegister& zt1, - const PRegister& pg, - const SVEMemOperand& addr) { - if (addr.IsScalarPlusScalar()) { - // Rm must not be xzr. - VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); - } - - Instr op; - if (addr.IsScalarPlusImmediate()) { - op = SVEStoreMultipleStructures_ScalarPlusImmFixed; - } else if (addr.IsScalarPlusScalar()) { - op = SVEStoreMultipleStructures_ScalarPlusScalarFixed; - } else { - // These instructions don't support any other addressing modes. - VIXL_ABORT(); - } - SVELdSt234Helper(num_regs, zt1, pg, addr, op); -} - -#define VIXL_DEFINE_ST1(MSZ, LANE_SIZE) \ - void Assembler::st1##MSZ(const ZRegister& zt, \ - const PRegister& pg, \ - const SVEMemOperand& addr) { \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - SVESt1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr); \ - } -#define VIXL_DEFINE_ST2(MSZ, LANE_SIZE) \ - void Assembler::st2##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const PRegister& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVESt234Helper(2, zt1, pg, addr); \ - } -#define VIXL_DEFINE_ST3(MSZ, LANE_SIZE) \ - void Assembler::st3##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const ZRegister& zt3, \ - const PRegister& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2, zt3); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVESt234Helper(3, zt1, pg, addr); \ - } -#define VIXL_DEFINE_ST4(MSZ, LANE_SIZE) \ - void Assembler::st4##MSZ(const ZRegister& zt1, \ - const ZRegister& zt2, \ - const ZRegister& zt3, \ - const ZRegister& zt4, \ - const PRegister& pg, \ - const SVEMemOperand& addr) { \ - USE(zt2, zt3, zt4); \ - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ - VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ - VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ - VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ - SVESt234Helper(4, zt1, pg, addr); \ - } - -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST1) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3) -VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4) - -void Assembler::stnt1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1B_z_p_br_contiguous, - STNT1B_z_p_bi_contiguous); -} - -void Assembler::stnt1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1D_z_p_br_contiguous, - STNT1D_z_p_bi_contiguous); -} - -void Assembler::stnt1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1H_z_p_br_contiguous, - STNT1H_z_p_bi_contiguous); -} - -void Assembler::stnt1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || - (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2))); - SVELd1St1ScaImmHelper(zt, - pg, - addr, - STNT1W_z_p_br_contiguous, - STNT1W_z_p_bi_contiguous); -} - -void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) { - // STR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}] - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); - VIXL_ASSERT(addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && - (addr.GetOffsetModifier() == SVE_MUL_VL))); - int64_t imm9 = addr.GetImmediateOffset(); - VIXL_ASSERT(IsInt9(imm9)); - Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; - Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; - - Instr op = STR_z_bi; - if (rt.IsPRegister()) { - op = STR_p_bi; - } - Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); -} - -// SVEMulIndex. - -void Assembler::sdot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - - Instr op = 0xffffffff; - switch (zda.GetLaneSizeInBits()) { - case kSRegSize: - VIXL_ASSERT(IsUint2(index)); - op = SDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); - break; - case kDRegSize: - VIXL_ASSERT(IsUint1(index)); - op = SDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - Emit(op); -} - -void Assembler::udot(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); - VIXL_ASSERT(AreSameLaneSize(zn, zm)); - - Instr op = 0xffffffff; - switch (zda.GetLaneSizeInBits()) { - case kSRegSize: - VIXL_ASSERT(IsUint2(index)); - op = UDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); - break; - case kDRegSize: - VIXL_ASSERT(IsUint1(index)); - op = UDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - Emit(op); -} - -// SVEPartitionBreak. - -void Assembler::brka(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - - Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; - Emit(BRKA_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); -} - -void Assembler::brkas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - - Emit(BRKAS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); -} - -void Assembler::brkb(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - - Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; - Emit(BRKB_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); -} - -void Assembler::brkbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - - Emit(BRKBS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); -} - -void Assembler::brkn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - USE(pm); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - VIXL_ASSERT(pd.Is(pm)); - - Emit(BRKN_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); -} - -void Assembler::brkns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - USE(pm); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); - VIXL_ASSERT(pd.Is(pm)); - - Emit(BRKNS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); -} - -// SVEPermutePredicate. - -void Assembler::punpkhi(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn) { - // PUNPKHI <Pd>.H, <Pn>.B - // 0000 0101 0011 0001 0100 000. ...0 .... - // H<16> = 1 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeH()); - VIXL_ASSERT(pn.IsLaneSizeB()); - - Emit(PUNPKHI_p_p | Pd(pd) | Pn(pn)); -} - -void Assembler::punpklo(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn) { - // PUNPKLO <Pd>.H, <Pn>.B - // 0000 0101 0011 0000 0100 000. ...0 .... - // H<16> = 0 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.IsLaneSizeH()); - VIXL_ASSERT(pn.IsLaneSizeB()); - - Emit(PUNPKLO_p_p | Pd(pd) | Pn(pn)); -} - -void Assembler::rev(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn) { - // REV <Pd>.<T>, <Pn>.<T> - // 0000 0101 ..11 0100 0100 000. ...0 .... - // size<23:22> | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn)); - - Emit(REV_p_p | SVESize(pd) | Pd(pd) | Rx<8, 5>(pn)); -} - -void Assembler::trn1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0101 000. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 0 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(TRN1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -void Assembler::trn2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0101 010. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 1 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(TRN2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -void Assembler::uzp1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0100 100. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 0 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(UZP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -void Assembler::uzp2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0100 110. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 1 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(UZP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -void Assembler::zip1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0100 000. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 0 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(ZIP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -void Assembler::zip2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T> - // 0000 0101 ..10 .... 0100 010. ...0 .... - // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 1 | Pn<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); - - Emit(ZIP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); -} - -// SVEPermuteVectorExtract. - -void Assembler::ext(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned offset) { - // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm> - // 0000 0101 001. .... 000. .... .... .... - // imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(IsUint8(offset)); - - int imm8h = ExtractUnsignedBitfield32(7, 3, offset); - int imm8l = ExtractUnsignedBitfield32(2, 0, offset); - Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) | - ImmUnsignedField<12, 10>(imm8l)); -} - -// SVEPermuteVectorInterleaving. - -void Assembler::trn1(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0111 00.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(TRN1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::trn2(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0111 01.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(TRN2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::uzp1(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0110 10.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(UZP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::uzp2(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0110 11.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(UZP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::zip1(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0110 00.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(ZIP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::zip2(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T> - // 0000 0101 ..1. .... 0110 01.. .... .... - // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(ZIP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -// SVEPermuteVectorPredicated. - -void Assembler::clasta(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm) { - // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T> - // 0000 0101 ..11 0000 101. .... .... .... - // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Rdn<4:0> - - USE(rn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rd.Is(rn)); - - Emit(CLASTA_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::clasta(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T> - // 0000 0101 ..10 1010 100. .... .... .... - // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Vdn<4:0> - - USE(vn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.Is(vn)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(AreSameLaneSize(vd, zm)); - - Emit(CLASTA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::clasta(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> - // 0000 0101 ..10 1000 100. .... .... .... - // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(CLASTA_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::clastb(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm) { - // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T> - // 0000 0101 ..11 0001 101. .... .... .... - // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Rdn<4:0> - - USE(rn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(rd.Is(rn)); - - Emit(CLASTB_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::clastb(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T> - // 0000 0101 ..10 1011 100. .... .... .... - // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Vdn<4:0> - - USE(vn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.Is(vn)); - VIXL_ASSERT(vd.IsScalar()); - VIXL_ASSERT(AreSameLaneSize(vd, zm)); - - Emit(CLASTB_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::clastb(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> - // 0000 0101 ..10 1001 100. .... .... .... - // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(CLASTB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -void Assembler::compact(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn) { - // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T> - // 0000 0101 1.10 0001 100. .... .... .... - // sz<22> | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT((zd.GetLaneSizeInBits() == kSRegSize) || - (zd.GetLaneSizeInBits() == kDRegSize)); - - Instr sz = (zd.GetLaneSizeInBits() == kDRegSize) ? (1 << 22) : 0; - Emit(COMPACT_z_p_z | sz | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::cpy(const ZRegister& zd, - const PRegisterM& pg, - const Register& rn) { - // CPY <Zd>.<T>, <Pg>/M, <R><n|SP> - // 0000 0101 ..10 1000 101. .... .... .... - // size<23:22> | Pg<12:10> | Rn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >= - zd.GetLaneSizeInBits()); - - Emit(CPY_z_p_r | SVESize(zd) | Rd(zd) | PgLow8(pg) | RnSP(rn)); -} - -void Assembler::cpy(const ZRegister& zd, - const PRegisterM& pg, - const VRegister& vn) { - // CPY <Zd>.<T>, <Pg>/M, <V><n> - // 0000 0101 ..10 0000 100. .... .... .... - // size<23:22> | Pg<12:10> | Vn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vn.IsScalar()); - VIXL_ASSERT(static_cast<unsigned>(vn.GetSizeInBits()) == - zd.GetLaneSizeInBits()); - - Emit(CPY_z_p_v | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(vn)); -} - -void Assembler::lasta(const Register& rd, - const PRegister& pg, - const ZRegister& zn) { - // LASTA <R><d>, <Pg>, <Zn>.<T> - // 0000 0101 ..10 0000 101. .... .... .... - // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LASTA_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::lasta(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // LASTA <V><d>, <Pg>, <Zn>.<T> - // 0000 0101 ..10 0010 100. .... .... .... - // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(LASTA_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::lastb(const Register& rd, - const PRegister& pg, - const ZRegister& zn) { - // LASTB <R><d>, <Pg>, <Zn>.<T> - // 0000 0101 ..10 0001 101. .... .... .... - // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(LASTB_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::lastb(const VRegister& vd, - const PRegister& pg, - const ZRegister& zn) { - // LASTB <V><d>, <Pg>, <Zn>.<T> - // 0000 0101 ..10 0011 100. .... .... .... - // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Vd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vd.IsScalar()); - - Emit(LASTB_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::rbit(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0101 ..10 0111 100. .... .... .... - // size<23:22> | opc<17:16> = 11 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - - Emit(RBIT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::revb(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0101 ..10 0100 100. .... .... .... - // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.IsLaneSizeH() || zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(REVB_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::revh(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T> - // 0000 0101 ..10 0101 100. .... .... .... - // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); - - Emit(REVH_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::revw(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - // REVW <Zd>.D, <Pg>/M, <Zn>.D - // 0000 0101 ..10 0110 100. .... .... .... - // size<23:22> | opc<17:16> = 10 | Pg<12:10> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - VIXL_ASSERT(zd.IsLaneSizeD()); - - Emit(REVW_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); -} - -void Assembler::splice(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T> - // 0000 0101 ..10 1100 100. .... .... .... - // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0> - - USE(zn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.Is(zn)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); -} - -// SVEPermuteVectorUnpredicated. - -void Assembler::dup(const ZRegister& zd, const Register& xn) { - // DUP <Zd>.<T>, <R><n|SP> - // 0000 0101 ..10 0000 0011 10.. .... .... - // size<23:22> | Rn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(DUP_z_r | SVESize(zd) | Rd(zd) | RnSP(xn)); -} - -void Assembler::dup(const ZRegister& zd, const ZRegister& zn, unsigned index) { - // DUP <Zd>.<T>, <Zn>.<T>[<imm>] - // 0000 0101 ..1. .... 0010 00.. .... .... - // imm2<23:22> | tsz<20:16> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(zd, zn)); - VIXL_ASSERT((index * zd.GetLaneSizeInBits()) < 512); - int n = zd.GetLaneSizeInBytesLog2(); - unsigned imm_7 = (index << (n + 1)) | (1 << n); - VIXL_ASSERT(IsUint7(imm_7)); - unsigned imm_2 = ExtractUnsignedBitfield32(6, 5, imm_7); - unsigned tsz_5 = ExtractUnsignedBitfield32(4, 0, imm_7); - - Emit(DUP_z_zi | ImmUnsignedField<23, 22>(imm_2) | - ImmUnsignedField<20, 16>(tsz_5) | Rd(zd) | Rn(zn)); -} - -void Assembler::insr(const ZRegister& zdn, const Register& rm) { - // INSR <Zdn>.<T>, <R><m> - // 0000 0101 ..10 0100 0011 10.. .... .... - // size<23:22> | Rm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(INSR_z_r | SVESize(zdn) | Rd(zdn) | Rn(rm)); -} - -void Assembler::insr(const ZRegister& zdn, const VRegister& vm) { - // INSR <Zdn>.<T>, <V><m> - // 0000 0101 ..11 0100 0011 10.. .... .... - // size<23:22> | Vm<9:5> | Zdn<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(vm.IsScalar()); - - Emit(INSR_z_v | SVESize(zdn) | Rd(zdn) | Rn(vm)); -} - -void Assembler::rev(const ZRegister& zd, const ZRegister& zn) { - // REV <Zd>.<T>, <Zn>.<T> - // 0000 0101 ..11 1000 0011 10.. .... .... - // size<23:22> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(zd, zn)); - - Emit(REV_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::sunpkhi(const ZRegister& zd, const ZRegister& zn) { - // SUNPKHI <Zd>.<T>, <Zn>.<Tb> - // 0000 0101 ..11 0001 0011 10.. .... .... - // size<23:22> | U<17> = 0 | H<16> = 1 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); - VIXL_ASSERT(!zd.IsLaneSizeB()); - - Emit(SUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::sunpklo(const ZRegister& zd, const ZRegister& zn) { - // SUNPKLO <Zd>.<T>, <Zn>.<Tb> - // 0000 0101 ..11 0000 0011 10.. .... .... - // size<23:22> | U<17> = 0 | H<16> = 0 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); - VIXL_ASSERT(!zd.IsLaneSizeB()); - - Emit(SUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::tbl(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm) { - // TBL <Zd>.<T>, { <Zn>.<T> }, <Zm>.<T> - // 0000 0101 ..1. .... 0011 00.. .... .... - // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(zd, zn, zm)); - - Emit(TBL_z_zz_1 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); -} - -void Assembler::uunpkhi(const ZRegister& zd, const ZRegister& zn) { - // UUNPKHI <Zd>.<T>, <Zn>.<Tb> - // 0000 0101 ..11 0011 0011 10.. .... .... - // size<23:22> | U<17> = 1 | H<16> = 1 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); - VIXL_ASSERT(!zd.IsLaneSizeB()); - - Emit(UUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -void Assembler::uunpklo(const ZRegister& zd, const ZRegister& zn) { - // UUNPKLO <Zd>.<T>, <Zn>.<Tb> - // 0000 0101 ..11 0010 0011 10.. .... .... - // size<23:22> | U<17> = 1 | H<16> = 0 | Zn<9:5> | Zd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); - VIXL_ASSERT(!zd.IsLaneSizeB()); - - Emit(UUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); -} - -// SVEPredicateCount. - -void Assembler::cntp(const Register& xd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - // CNTP <Xd>, <Pg>, <Pn>.<T> - // 0010 0101 ..10 0000 10.. ..0. .... .... - // size<23:22> | opc<18:16> = 000 | Pg<13:10> | o2<9> = 0 | Pn<8:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX()); - VIXL_ASSERT(pg.IsUnqualified()); - if (pg.HasLaneSize()) VIXL_ASSERT(AreSameFormat(pg, pn)); - - Emit(CNTP_r_p_p | SVESize(pn) | Rd(xd) | Pg<13, 10>(pg) | Pn(pn)); -} - -// SVEPredicateLogicalOp. -void Assembler::and_(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(AND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::ands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(ANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::bic(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(BIC_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::bics(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(BICS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::eor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(EOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::eors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(EORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::nand(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(NAND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::nands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(NANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::nor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(NOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::nors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(NORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::orn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(ORN_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::orns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(ORNS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::orr(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(ORR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::orrs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameFormat(pd, pn, pm)); - VIXL_ASSERT(pd.IsLaneSizeB()); - Emit(ORRS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::sel(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - Emit(SEL_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -// SVEPredicateMisc. - -void Assembler::pfalse(const PRegisterWithLaneSize& pd) { - // PFALSE <Pd>.B - // 0010 0101 0001 1000 1110 0100 0000 .... - // op<23> = 0 | S<22> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - // Ignore the lane size, since it makes no difference to the operation. - - Emit(PFALSE_p | Pd(pd)); -} - -void Assembler::pfirst(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - // PFIRST <Pdn>.B, <Pg>, <Pdn>.B - // 0010 0101 0101 1000 1100 000. ...0 .... - // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pdn<3:0> - - USE(pn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.Is(pn)); - VIXL_ASSERT(pd.IsLaneSizeB()); - - Emit(PFIRST_p_p_p | Pd(pd) | Pg<8, 5>(pg)); -} - -void Assembler::pnext(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - // PNEXT <Pdn>.<T>, <Pg>, <Pdn>.<T> - // 0010 0101 ..01 1001 1100 010. ...0 .... - // size<23:22> | Pg<8:5> | Pdn<3:0> - - USE(pn); - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pd.Is(pn)); - - Emit(PNEXT_p_p_p | SVESize(pd) | Pd(pd) | Pg<8, 5>(pg)); -} - -void Assembler::ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { - // PTEST <Pg>, <Pn>.B - // 0010 0101 0101 0000 11.. ..0. ...0 0000 - // op<23> = 0 | S<22> = 1 | Pg<13:10> | Pn<8:5> | opc2<3:0> = 0000 - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(pn.IsLaneSizeB()); - - Emit(PTEST_p_p | Pg<13, 10>(pg) | Rx<8, 5>(pn)); -} - -void Assembler::ptrue(const PRegisterWithLaneSize& pd, int pattern) { - // PTRUE <Pd>.<T>{, <pattern>} - // 0010 0101 ..01 1000 1110 00.. ...0 .... - // size<23:22> | S<16> = 0 | pattern<9:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(PTRUE_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); -} - -void Assembler::ptrues(const PRegisterWithLaneSize& pd, int pattern) { - // PTRUES <Pd>.<T>{, <pattern>} - // 0010 0101 ..01 1001 1110 00.. ...0 .... - // size<23:22> | S<16> = 1 | pattern<9:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(PTRUES_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); -} - -void Assembler::rdffr(const PRegisterWithLaneSize& pd) { - // RDFFR <Pd>.B - // 0010 0101 0001 1001 1111 0000 0000 .... - // op<23> = 0 | S<22> = 0 | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(RDFFR_p_f | Pd(pd)); -} - -void Assembler::rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { - // RDFFR <Pd>.B, <Pg>/Z - // 0010 0101 0001 1000 1111 000. ...0 .... - // op<23> = 0 | S<22> = 0 | Pg<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(RDFFR_p_p_f | Pd(pd) | Pg<8, 5>(pg)); -} - -void Assembler::rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { - // RDFFRS <Pd>.B, <Pg>/Z - // 0010 0101 0101 1000 1111 000. ...0 .... - // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(RDFFRS_p_p_f | Pd(pd) | Pg<8, 5>(pg)); -} - -// SVEPropagateBreak. - -void Assembler::brkpa(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B - // 0010 0101 0000 .... 11.. ..0. ...0 .... - // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(BRKPA_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::brkpas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B - // 0010 0101 0100 .... 11.. ..0. ...0 .... - // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(BRKPAS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::brkpb(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B - // 0010 0101 0000 .... 11.. ..0. ...1 .... - // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(BRKPB_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -void Assembler::brkpbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B - // 0010 0101 0100 .... 11.. ..0. ...1 .... - // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | - // Pd<3:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(BRKPBS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); -} - -// SVEStackFrameAdjustment. - -void Assembler::addpl(const Register& xd, const Register& xn, int imm6) { - // ADDPL <Xd|SP>, <Xn|SP>, #<imm> - // 0000 0100 011. .... 0101 0... .... .... - // op<22> = 1 | Rn<20:16> | imm6<10:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX()); - VIXL_ASSERT(xn.IsX()); - - Emit(ADDPL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); -} - -void Assembler::addvl(const Register& xd, const Register& xn, int imm6) { - // ADDVL <Xd|SP>, <Xn|SP>, #<imm> - // 0000 0100 001. .... 0101 0... .... .... - // op<22> = 0 | Rn<20:16> | imm6<10:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX()); - VIXL_ASSERT(xn.IsX()); - - Emit(ADDVL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); -} - -// SVEStackFrameSize. - -void Assembler::rdvl(const Register& xd, int imm6) { - // RDVL <Xd>, #<imm> - // 0000 0100 1011 1111 0101 0... .... .... - // op<22> = 0 | opc2<20:16> = 11111 | imm6<10:5> | Rd<4:0> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(xd.IsX()); - - Emit(RDVL_r_i | Rd(xd) | ImmField<10, 5>(imm6)); -} - -// SVEVectorSelect. - -void Assembler::sel(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - - Emit(SEL_z_p_zz | SVESize(zd) | Rd(zd) | Pg<13, 10>(pg) | Rn(zn) | Rm(zm)); -} - -// SVEWriteFFR. - -void Assembler::setffr() { - // SETFFR - // 0010 0101 0010 1100 1001 0000 0000 0000 - // opc<23:22> = 00 - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(SETFFR_f); -} - -void Assembler::wrffr(const PRegisterWithLaneSize& pn) { - // WRFFR <Pn>.B - // 0010 0101 0010 1000 1001 000. ...0 0000 - // opc<23:22> = 00 | Pn<8:5> - - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - - Emit(WRFFR_f_p | Rx<8, 5>(pn)); -} - -// Aliases. - -void Assembler::bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - and_(zd, zn, ~imm); -} - -void Assembler::eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - eor(zd, zn, ~imm); -} - -void Assembler::orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - orr(zd, zn, ~imm); -} - - -void Assembler::fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { - if (IsPositiveZero(imm)) { - cpy(zd, pg, 0); - } else { - fcpy(zd, pg, imm); - } -} - -void Assembler::fmov(const ZRegister& zd, double imm) { - if (IsPositiveZero(imm)) { - dup(zd, imm); - } else { - fdup(zd, imm); - } -} - -void Assembler::mov(const PRegister& pd, const PRegister& pn) { - // If the inputs carry a lane size, they must match. - VIXL_ASSERT((!pd.HasLaneSize() && !pn.HasLaneSize()) || - AreSameLaneSize(pd, pn)); - orr(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); -} - -void Assembler::mov(const PRegisterWithLaneSize& pd, - const PRegisterM& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - sel(pd, pg, pn, pd); -} - -void Assembler::mov(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - and_(pd, pg, pn, pn); -} - -void Assembler::mov(const ZRegister& zd, - const PRegister& pg, - int imm8, - int shift) { - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - cpy(zd, pg, imm8, shift); -} - -void Assembler::mov(const ZRegister& zd, const Register& xn) { dup(zd, xn); } - -void Assembler::mov(const ZRegister& zd, const VRegister& vn) { - VIXL_ASSERT(vn.IsScalar()); - VIXL_ASSERT(AreSameLaneSize(zd, vn)); - dup(zd, vn.Z().WithSameLaneSizeAs(vn), 0); -} - -void Assembler::mov(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - orr(zd.VnD(), zn.VnD(), zn.VnD()); -} - -void Assembler::mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - dup(zd, zn, index); -} - -void Assembler::mov(const ZRegister& zd, - const PRegisterM& pg, - const Register& rn) { - cpy(zd, pg, rn); -} - -void Assembler::mov(const ZRegister& zd, - const PRegisterM& pg, - const VRegister& vn) { - VIXL_ASSERT(vn.IsScalar()); - VIXL_ASSERT(AreSameLaneSize(zd, vn)); - cpy(zd, pg, vn); -} - -void Assembler::mov(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn) { - VIXL_ASSERT(AreSameLaneSize(zd, zn)); - sel(zd, pg, zn, zd); -} - -void Assembler::mov(const ZRegister& zd, uint64_t imm) { - // Mov is an alias of dupm for certain values of imm. Whilst this matters in - // the disassembler, for the assembler, we don't distinguish between the - // two mnemonics, and simply call dupm. - dupm(zd, imm); -} - -void Assembler::mov(const ZRegister& zd, int imm8, int shift) { - dup(zd, imm8, shift); -} - -void Assembler::movs(const PRegister& pd, const PRegister& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - orrs(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); -} - -void Assembler::movs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - ands(pd, pg, pn, pn); -} - -void Assembler::not_(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - eor(pd, pg, pn, pg.VnB()); -} - -void Assembler::nots(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); - eors(pd, pg, pn, pg.VnB()); -} - -} // namespace aarch64 -} // namespace vixl diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h index bf93918d..36f5568d 100644 --- a/src/aarch64/constants-aarch64.h +++ b/src/aarch64/constants-aarch64.h @@ -34,8 +34,6 @@ namespace aarch64 { const unsigned kNumberOfRegisters = 32; const unsigned kNumberOfVRegisters = 32; -const unsigned kNumberOfZRegisters = kNumberOfVRegisters; -const unsigned kNumberOfPRegisters = 16; // Callee saved registers are x21-x30(lr). const int kNumberOfCalleeSavedRegisters = 10; const int kFirstCalleeSavedRegisterIndex = 21; @@ -43,34 +41,14 @@ const int kFirstCalleeSavedRegisterIndex = 21; // still caller-saved. const int kNumberOfCalleeSavedFPRegisters = 8; const int kFirstCalleeSavedFPRegisterIndex = 8; -// All predicated instructions accept at least p0-p7 as the governing predicate. -const unsigned kNumberOfGoverningPRegisters = 8; // clang-format off -#define AARCH64_P_REGISTER_CODE_LIST(R) \ - R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ - R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) - #define AARCH64_REGISTER_CODE_LIST(R) \ R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \ R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \ R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31) -// SVE loads and stores use "w" instead of "s" for word-sized accesses, so the -// mapping from the load/store variant to constants like k*RegSize is irregular. -#define VIXL_SVE_LOAD_STORE_VARIANT_LIST(V) \ - V(b, B) \ - V(h, H) \ - V(w, S) \ - V(d, D) - -// Sign-extending loads don't have double-word variants. -#define VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(V) \ - V(b, B) \ - V(h, H) \ - V(w, S) - #define INSTRUCTION_FIELDS_LIST(V_) \ /* Register fields */ \ V_(Rd, 4, 0, ExtractBits) /* Destination register. */ \ @@ -81,11 +59,6 @@ V_(Ra, 14, 10, ExtractBits) /* Third source register. */ \ V_(Rt, 4, 0, ExtractBits) /* Load/store register. */ \ V_(Rt2, 14, 10, ExtractBits) /* Load/store second register. */ \ V_(Rs, 20, 16, ExtractBits) /* Exclusive access status. */ \ -V_(Pt, 3, 0, ExtractBits) /* Load/store register (p0-p7). */ \ -V_(Pd, 3, 0, ExtractBits) /* SVE destination predicate register. */ \ -V_(Pn, 8, 5, ExtractBits) /* SVE first source predicate register. */ \ -V_(Pm, 19, 16, ExtractBits) /* SVE second source predicate register.*/ \ -V_(PgLow8, 12, 10, ExtractBits) /* Governing predicate (p0-p7). */ \ \ /* Common bits */ \ V_(SixtyFourBits, 31, 31, ExtractBits) \ @@ -101,7 +74,7 @@ V_(ImmDPShift, 15, 10, ExtractBits) \ \ /* Add/subtract immediate */ \ V_(ImmAddSub, 21, 10, ExtractBits) \ -V_(ImmAddSubShift, 22, 22, ExtractBits) \ +V_(ShiftAddSub, 23, 22, ExtractBits) \ \ /* Add/substract extend */ \ V_(ImmExtendShift, 12, 10, ExtractBits) \ @@ -204,23 +177,7 @@ V_(NEONCmode, 15, 12, ExtractBits) \ /* NEON Shift Immediate fields */ \ V_(ImmNEONImmhImmb, 22, 16, ExtractBits) \ V_(ImmNEONImmh, 22, 19, ExtractBits) \ -V_(ImmNEONImmb, 18, 16, ExtractBits) \ - \ -/* SVE generic fields */ \ -V_(SVESize, 23, 22, ExtractBits) \ -V_(ImmSVEVLScale, 10, 5, ExtractSignedBits) \ -V_(ImmSVEIntWideSigned, 12, 5, ExtractSignedBits) \ -V_(ImmSVEIntWideUnsigned, 12, 5, ExtractBits) \ -V_(ImmSVEPredicateConstraint, 9, 5, ExtractBits) \ - \ -/* SVE Bitwise Immediate bitfield */ \ -V_(SVEBitN, 17, 17, ExtractBits) \ -V_(SVEImmRotate, 16, 11, ExtractBits) \ -V_(SVEImmSetBits, 10, 5, ExtractBits) \ - \ -V_(SVEImmPrefetchOperation, 3, 0, ExtractBits) \ -V_(SVEPrefetchHint, 3, 3, ExtractBits) - +V_(ImmNEONImmb, 18, 16, ExtractBits) // clang-format on #define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \ @@ -278,22 +235,7 @@ enum Condition { // Aliases. hs = cs, // C set Unsigned higher or same. - lo = cc, // C clear Unsigned lower. - - // Floating-point additional condition code. - uo, // Unordered comparison. - - // SVE predicate condition aliases. - sve_none = eq, // No active elements were true. - sve_any = ne, // An active element was true. - sve_nlast = cs, // The last element was not true. - sve_last = cc, // The last element was true. - sve_first = mi, // The first element was true. - sve_nfrst = pl, // The first element was not true. - sve_pmore = hi, // An active element was true but not the last element. - sve_plast = ls, // The last active element was true or no active elements were true. - sve_tcont = ge, // CTERM termination condition not deleted. - sve_tstop = lt // CTERM termination condition deleted. + lo = cc // C clear Unsigned lower. }; inline Condition InvertCondition(Condition cond) { @@ -337,12 +279,7 @@ enum StatusFlags { FPEqualFlag = ZCFlag, FPLessThanFlag = NFlag, FPGreaterThanFlag = CFlag, - FPUnorderedFlag = CVFlag, - - // SVE condition flags. - SVEFirstFlag = NFlag, - SVENoneFlag = ZFlag, - SVENotLastFlag = CFlag + FPUnorderedFlag = CVFlag }; enum Shift { @@ -366,17 +303,6 @@ enum Extend { SXTX = 7 }; -enum SVEOffsetModifier { - NO_SVE_OFFSET_MODIFIER, - // Multiply (each element of) the offset by either the vector or predicate - // length, according to the context. - SVE_MUL_VL, - // Shift or extend modifiers (as in `Shift` or `Extend`). - SVE_LSL, - SVE_UXTW, - SVE_SXTW -}; - enum SystemHint { NOP = 0, YIELD = 1, @@ -442,12 +368,6 @@ enum PrefetchOperation { PSTL3STRM = 0x15 }; -constexpr bool IsNamedPrefetchOperation(int op) { - return ((op >= PLDL1KEEP) && (op <= PLDL3STRM)) || - ((op >= PLIL1KEEP) && (op <= PLIL3STRM)) || - ((op >= PSTL1KEEP) && (op <= PSTL3STRM)); -} - enum BType { // Set when executing any instruction on a guarded page, except those cases // listed below. @@ -509,36 +429,6 @@ enum DataCacheOp { ZVA = CacheOpEncoder<3, 7, 4, 1>::value }; -// Some SVE instructions support a predicate constraint pattern. This is -// interpreted as a VL-dependent value, and is typically used to initialise -// predicates, or to otherwise limit the number of processed elements. -enum SVEPredicateConstraint { - // Select 2^N elements, for the largest possible N. - SVE_POW2 = 0x0, - // Each VL<N> selects exactly N elements if possible, or zero if N is greater - // than the number of elements. Note that the encoding values for VL<N> are - // not linearly related to N. - SVE_VL1 = 0x1, - SVE_VL2 = 0x2, - SVE_VL3 = 0x3, - SVE_VL4 = 0x4, - SVE_VL5 = 0x5, - SVE_VL6 = 0x6, - SVE_VL7 = 0x7, - SVE_VL8 = 0x8, - SVE_VL16 = 0x9, - SVE_VL32 = 0xa, - SVE_VL64 = 0xb, - SVE_VL128 = 0xc, - SVE_VL256 = 0xd, - // Each MUL<N> selects the largest multiple of N elements that the vector - // length supports. Note that for D-sized lanes, this can be zero. - SVE_MUL4 = 0x1d, - SVE_MUL3 = 0x1e, - // Select all elements. - SVE_ALL = 0x1f -}; - // Instruction enumerations. // // These are the masks that define a class of instructions, and the list of @@ -613,14 +503,6 @@ enum NEONScalarFormatField { NEON_D = 0x00C00000 }; -enum SVESizeField { - SVESizeFieldMask = 0x00C00000, - SVE_B = 0x00000000, - SVE_H = 0x00400000, - SVE_S = 0x00800000, - SVE_D = 0x00C00000 -}; - // PC relative addressing. enum PCRelAddressingOp { PCRelAddressingFixed = 0x10000000, @@ -649,8 +531,8 @@ enum AddSubOp { enum AddSubImmediateOp { AddSubImmediateFixed = 0x11000000, - AddSubImmediateFMask = 0x1F800000, - AddSubImmediateMask = 0xFF800000, + AddSubImmediateFMask = 0x1F000000, + AddSubImmediateMask = 0xFF000000, #define ADD_SUB_IMMEDIATE(A) \ A##_w_imm = AddSubImmediateFixed | A, \ A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits @@ -2778,1626 +2660,11 @@ enum NEONScalarShiftImmediateOp { NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm }; -enum SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsOp { - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed = 0x84A00000, - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000, - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000, - LD1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed, - LDFF1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00002000, - LD1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, - LDFF1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 -}; - -enum SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsOp { - SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed = 0x85200000, - SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000, - SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000, - LD1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, - LDFF1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 -}; - -enum SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsOp { - SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed = 0x84000000, - SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE208000, - SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask = 0xFFA0E000, - LD1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed, - LDFF1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00002000, - LD1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00004000, - LDFF1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00006000, - LD1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000, - LDFF1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00802000, - LD1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00804000, - LDFF1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00806000, - LD1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01004000, - LDFF1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01006000 -}; - -enum SVE32BitGatherLoad_VectorPlusImmOp { - SVE32BitGatherLoad_VectorPlusImmFixed = 0x84208000, - SVE32BitGatherLoad_VectorPlusImmFMask = 0xFE608000, - SVE32BitGatherLoad_VectorPlusImmMask = 0xFFE0E000, - LD1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed, - LDFF1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00002000, - LD1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00004000, - LDFF1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00006000, - LD1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00800000, - LDFF1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00802000, - LD1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00804000, - LDFF1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00806000, - LD1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01004000, - LDFF1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01006000 -}; - -enum SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsOp { - SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed = 0x84200000, - SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08010, - SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E010, - PRFB_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed, - PRFH_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00002000, - PRFW_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00004000, - PRFD_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00006000 -}; - -enum SVE32BitGatherPrefetch_VectorPlusImmOp { - SVE32BitGatherPrefetch_VectorPlusImmFixed = 0x8400E000, - SVE32BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010, - SVE32BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010, - PRFB_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed, - PRFH_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x00800000, - PRFW_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01000000, - PRFD_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01800000 -}; - -enum SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsOp { - SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed = 0xE4608000, - SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFMask = 0xFE60A000, - SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask = 0xFFE0A000, - ST1H_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x01000000 -}; - -enum SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsOp { - SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed = 0xE4408000, - SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE60A000, - SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask = 0xFFE0A000, - ST1B_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed, - ST1H_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x01000000 -}; - -enum SVE32BitScatterStore_VectorPlusImmOp { - SVE32BitScatterStore_VectorPlusImmFixed = 0xE460A000, - SVE32BitScatterStore_VectorPlusImmFMask = 0xFE60E000, - SVE32BitScatterStore_VectorPlusImmMask = 0xFFE0E000, - ST1B_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed, - ST1H_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x00800000, - ST1W_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x01000000 -}; - -enum SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsOp { - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed = 0xC4200000, - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFMask = 0xFE208000, - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask = 0xFFA0E000, - LD1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00800000, - LDFF1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00802000, - LD1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00804000, - LDFF1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00806000, - LD1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01000000, - LDFF1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01002000, - LD1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01004000, - LDFF1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01006000, - LD1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01804000, - LDFF1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01806000 -}; - -enum SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsOp { - SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000, - SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFMask = 0xFE608000, - SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000, - LD1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00800000, - LDFF1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00802000, - LD1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00804000, - LDFF1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00806000, - LD1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01000000, - LDFF1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01002000, - LD1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01004000, - LDFF1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01006000, - LD1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01804000, - LDFF1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01806000 -}; - -enum SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsOp { - SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed = 0xC4408000, - SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE608000, - SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000, - LD1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed, - LDFF1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00002000, - LD1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00004000, - LDFF1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00006000, - LD1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000, - LDFF1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00802000, - LD1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00804000, - LDFF1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00806000, - LD1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000, - LDFF1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01002000, - LD1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01004000, - LDFF1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01006000, - LD1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01804000, - LDFF1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01806000 -}; - -enum SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsOp { - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xC4000000, - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE208000, - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFA0E000, - LD1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, - LDFF1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00002000, - LD1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00004000, - LDFF1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00006000, - LD1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000, - LDFF1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00802000, - LD1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00804000, - LDFF1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00806000, - LD1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000, - LDFF1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01002000, - LD1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01004000, - LDFF1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01006000, - LD1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01804000, - LDFF1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01806000 -}; - -enum SVE64BitGatherLoad_VectorPlusImmOp { - SVE64BitGatherLoad_VectorPlusImmFixed = 0xC4208000, - SVE64BitGatherLoad_VectorPlusImmFMask = 0xFE608000, - SVE64BitGatherLoad_VectorPlusImmMask = 0xFFE0E000, - LD1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed, - LDFF1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00002000, - LD1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00004000, - LDFF1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00006000, - LD1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00800000, - LDFF1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00802000, - LD1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00804000, - LDFF1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00806000, - LD1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01000000, - LDFF1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01002000, - LD1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01004000, - LDFF1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01006000, - LD1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01804000, - LDFF1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01806000 -}; - -enum SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsOp { - SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000, - SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFMask = 0xFFE08010, - SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E010, - PRFB_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed, - PRFH_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00002000, - PRFW_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00004000, - PRFD_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00006000 -}; - -enum SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsOp { - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xC4200000, - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFFA08010, - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFA0E010, - PRFB_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed, - PRFH_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00002000, - PRFW_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00004000, - PRFD_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00006000 -}; - -enum SVE64BitGatherPrefetch_VectorPlusImmOp { - SVE64BitGatherPrefetch_VectorPlusImmFixed = 0xC400E000, - SVE64BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010, - SVE64BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010, - PRFB_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed, - PRFH_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x00800000, - PRFW_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01000000, - PRFD_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01800000 -}; - -enum SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsOp { - SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed = 0xE420A000, - SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFMask = 0xFE60E000, - SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000, - ST1H_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01000000, - ST1D_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01800000 -}; - -enum SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsOp { - SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed = 0xE400A000, - SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE60E000, - SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000, - ST1B_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed, - ST1H_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000, - ST1D_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01800000 -}; - -enum SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsOp { - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xE4208000, - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFE60A000, - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFE0A000, - ST1H_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01000000, - ST1D_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01800000 -}; - -enum SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsOp { - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xE4008000, - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE60A000, - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFE0A000, - ST1B_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, - ST1H_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000, - ST1W_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000, - ST1D_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01800000 -}; - -enum SVE64BitScatterStore_VectorPlusImmOp { - SVE64BitScatterStore_VectorPlusImmFixed = 0xE440A000, - SVE64BitScatterStore_VectorPlusImmFMask = 0xFE60E000, - SVE64BitScatterStore_VectorPlusImmMask = 0xFFE0E000, - ST1B_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed, - ST1H_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x00800000, - ST1W_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01000000, - ST1D_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01800000 -}; - -enum SVEAddressGenerationOp { - SVEAddressGenerationFixed = 0x0420A000, - SVEAddressGenerationFMask = 0xFF20F000, - SVEAddressGenerationMask = 0xFFE0F000, - ADR_z_az_d_s32_scaled = SVEAddressGenerationFixed, - ADR_z_az_d_u32_scaled = SVEAddressGenerationFixed | 0x00400000, - ADR_z_az_s_same_scaled = SVEAddressGenerationFixed | 0x00800000, - ADR_z_az_d_same_scaled = SVEAddressGenerationFixed | 0x00C00000 -}; - -enum SVEBitwiseLogicalUnpredicatedOp { - SVEBitwiseLogicalUnpredicatedFixed = 0x04202000, - SVEBitwiseLogicalUnpredicatedFMask = 0xFF20E000, - SVEBitwiseLogicalUnpredicatedMask = 0xFFE0FC00, - AND_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00001000, - ORR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00401000, - EOR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00801000, - BIC_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00C01000 -}; - -enum SVEBitwiseLogicalWithImm_UnpredicatedOp { - SVEBitwiseLogicalWithImm_UnpredicatedFixed = 0x05000000, - SVEBitwiseLogicalWithImm_UnpredicatedFMask = 0xFF3C0000, - SVEBitwiseLogicalWithImm_UnpredicatedMask = 0xFFFC0000, - ORR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed, - EOR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00400000, - AND_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00800000 -}; - -enum SVEBitwiseLogical_PredicatedOp { - SVEBitwiseLogical_PredicatedFixed = 0x04180000, - SVEBitwiseLogical_PredicatedFMask = 0xFF38E000, - SVEBitwiseLogical_PredicatedMask = 0xFF3FE000, - ORR_z_p_zz = SVEBitwiseLogical_PredicatedFixed, - EOR_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00010000, - AND_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00020000, - BIC_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00030000 -}; - -enum SVEBitwiseShiftByImm_PredicatedOp { - SVEBitwiseShiftByImm_PredicatedFixed = 0x04008000, - SVEBitwiseShiftByImm_PredicatedFMask = 0xFF30E000, - SVEBitwiseShiftByImm_PredicatedMask = 0xFF3FE000, - ASR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed, - LSR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00010000, - LSL_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00030000, - ASRD_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00040000 -}; - -enum SVEBitwiseShiftByVector_PredicatedOp { - SVEBitwiseShiftByVector_PredicatedFixed = 0x04108000, - SVEBitwiseShiftByVector_PredicatedFMask = 0xFF38E000, - SVEBitwiseShiftByVector_PredicatedMask = 0xFF3FE000, - ASR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed, - LSR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00010000, - LSL_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00030000, - ASRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00040000, - LSRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00050000, - LSLR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00070000 -}; - -enum SVEBitwiseShiftByWideElements_PredicatedOp { - SVEBitwiseShiftByWideElements_PredicatedFixed = 0x04188000, - SVEBitwiseShiftByWideElements_PredicatedFMask = 0xFF38E000, - SVEBitwiseShiftByWideElements_PredicatedMask = 0xFF3FE000, - ASR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed, - LSR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00010000, - LSL_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00030000 -}; - -enum SVEBitwiseShiftUnpredicatedOp { - SVEBitwiseShiftUnpredicatedFixed = 0x04208000, - SVEBitwiseShiftUnpredicatedFMask = 0xFF20E000, - SVEBitwiseShiftUnpredicatedMask = 0xFF20FC00, - ASR_z_zw = SVEBitwiseShiftUnpredicatedFixed, - LSR_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000400, - LSL_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000C00, - ASR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001000, - LSR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001400, - LSL_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001C00 -}; - -enum SVEBroadcastBitmaskImmOp { - SVEBroadcastBitmaskImmFixed = 0x05C00000, - SVEBroadcastBitmaskImmFMask = 0xFFFC0000, - SVEBroadcastBitmaskImmMask = 0xFFFC0000, - DUPM_z_i = SVEBroadcastBitmaskImmFixed -}; - -enum SVEBroadcastFPImm_UnpredicatedOp { - SVEBroadcastFPImm_UnpredicatedFixed = 0x2539C000, - SVEBroadcastFPImm_UnpredicatedFMask = 0xFF39C000, - SVEBroadcastFPImm_UnpredicatedMask = 0xFF3FE000, - FDUP_z_i = SVEBroadcastFPImm_UnpredicatedFixed -}; - -enum SVEBroadcastGeneralRegisterOp { - SVEBroadcastGeneralRegisterFixed = 0x05203800, - SVEBroadcastGeneralRegisterFMask = 0xFF3FFC00, - SVEBroadcastGeneralRegisterMask = 0xFF3FFC00, - DUP_z_r = SVEBroadcastGeneralRegisterFixed -}; - -enum SVEBroadcastIndexElementOp { - SVEBroadcastIndexElementFixed = 0x05202000, - SVEBroadcastIndexElementFMask = 0xFF20FC00, - SVEBroadcastIndexElementMask = 0xFF20FC00, - DUP_z_zi = SVEBroadcastIndexElementFixed -}; - -enum SVEBroadcastIntImm_UnpredicatedOp { - SVEBroadcastIntImm_UnpredicatedFixed = 0x2538C000, - SVEBroadcastIntImm_UnpredicatedFMask = 0xFF39C000, - SVEBroadcastIntImm_UnpredicatedMask = 0xFF3FC000, - DUP_z_i = SVEBroadcastIntImm_UnpredicatedFixed -}; - -enum SVECompressActiveElementsOp { - SVECompressActiveElementsFixed = 0x05A18000, - SVECompressActiveElementsFMask = 0xFFBFE000, - SVECompressActiveElementsMask = 0xFFBFE000, - COMPACT_z_p_z = SVECompressActiveElementsFixed -}; - -enum SVEConditionallyBroadcastElementToVectorOp { - SVEConditionallyBroadcastElementToVectorFixed = 0x05288000, - SVEConditionallyBroadcastElementToVectorFMask = 0xFF3EE000, - SVEConditionallyBroadcastElementToVectorMask = 0xFF3FE000, - CLASTA_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed, - CLASTB_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed | 0x00010000 -}; - -enum SVEConditionallyExtractElementToGeneralRegisterOp { - SVEConditionallyExtractElementToGeneralRegisterFixed = 0x0530A000, - SVEConditionallyExtractElementToGeneralRegisterFMask = 0xFF3EE000, - SVEConditionallyExtractElementToGeneralRegisterMask = 0xFF3FE000, - CLASTA_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed, - CLASTB_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed | 0x00010000 -}; - -enum SVEConditionallyExtractElementToSIMDFPScalarOp { - SVEConditionallyExtractElementToSIMDFPScalarFixed = 0x052A8000, - SVEConditionallyExtractElementToSIMDFPScalarFMask = 0xFF3EE000, - SVEConditionallyExtractElementToSIMDFPScalarMask = 0xFF3FE000, - CLASTA_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed, - CLASTB_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed | 0x00010000 -}; - -enum SVEConditionallyTerminateScalarsOp { - SVEConditionallyTerminateScalarsFixed = 0x25202000, - SVEConditionallyTerminateScalarsFMask = 0xFF20FC0F, - SVEConditionallyTerminateScalarsMask = 0xFFA0FC1F, - CTERMEQ_rr = SVEConditionallyTerminateScalarsFixed | 0x00800000, - CTERMNE_rr = SVEConditionallyTerminateScalarsFixed | 0x00800010 -}; - -enum SVEConstructivePrefix_UnpredicatedOp { - SVEConstructivePrefix_UnpredicatedFixed = 0x0420BC00, - SVEConstructivePrefix_UnpredicatedFMask = 0xFF20FC00, - SVEConstructivePrefix_UnpredicatedMask = 0xFFFFFC00, - MOVPRFX_z_z = SVEConstructivePrefix_UnpredicatedFixed -}; - -enum SVEContiguousFirstFaultLoad_ScalarPlusScalarOp { - SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed = 0xA4006000, - SVEContiguousFirstFaultLoad_ScalarPlusScalarFMask = 0xFE00E000, - SVEContiguousFirstFaultLoad_ScalarPlusScalarMask = 0xFFE0E000, - LDFF1B_z_p_br_u8 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed, - LDFF1B_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00200000, - LDFF1B_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00400000, - LDFF1B_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00600000, - LDFF1SW_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00800000, - LDFF1H_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00A00000, - LDFF1H_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00C00000, - LDFF1H_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00E00000, - LDFF1SH_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01000000, - LDFF1SH_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01200000, - LDFF1W_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01400000, - LDFF1W_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01600000, - LDFF1SB_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01800000, - LDFF1SB_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01A00000, - LDFF1SB_z_p_br_s16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01C00000, - LDFF1D_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01E00000 -}; - -enum SVEContiguousLoad_ScalarPlusImmOp { - SVEContiguousLoad_ScalarPlusImmFixed = 0xA400A000, - SVEContiguousLoad_ScalarPlusImmFMask = 0xFE10E000, - SVEContiguousLoad_ScalarPlusImmMask = 0xFFF0E000, - LD1B_z_p_bi_u8 = SVEContiguousLoad_ScalarPlusImmFixed, - LD1B_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00200000, - LD1B_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00400000, - LD1B_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00600000, - LD1SW_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00800000, - LD1H_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00A00000, - LD1H_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00C00000, - LD1H_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00E00000, - LD1SH_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01000000, - LD1SH_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01200000, - LD1W_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01400000, - LD1W_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01600000, - LD1SB_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01800000, - LD1SB_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01A00000, - LD1SB_z_p_bi_s16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01C00000, - LD1D_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01E00000 -}; - -enum SVEContiguousLoad_ScalarPlusScalarOp { - SVEContiguousLoad_ScalarPlusScalarFixed = 0xA4004000, - SVEContiguousLoad_ScalarPlusScalarFMask = 0xFE00E000, - SVEContiguousLoad_ScalarPlusScalarMask = 0xFFE0E000, - LD1B_z_p_br_u8 = SVEContiguousLoad_ScalarPlusScalarFixed, - LD1B_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00200000, - LD1B_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00400000, - LD1B_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00600000, - LD1SW_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00800000, - LD1H_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00A00000, - LD1H_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00C00000, - LD1H_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00E00000, - LD1SH_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01000000, - LD1SH_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01200000, - LD1W_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01400000, - LD1W_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01600000, - LD1SB_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01800000, - LD1SB_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01A00000, - LD1SB_z_p_br_s16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01C00000, - LD1D_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01E00000 -}; - -enum SVEContiguousNonFaultLoad_ScalarPlusImmOp { - SVEContiguousNonFaultLoad_ScalarPlusImmFixed = 0xA410A000, - SVEContiguousNonFaultLoad_ScalarPlusImmFMask = 0xFE10E000, - SVEContiguousNonFaultLoad_ScalarPlusImmMask = 0xFFF0E000, - LDNF1B_z_p_bi_u8 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed, - LDNF1B_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00200000, - LDNF1B_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00400000, - LDNF1B_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00600000, - LDNF1SW_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00800000, - LDNF1H_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00A00000, - LDNF1H_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00C00000, - LDNF1H_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00E00000, - LDNF1SH_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01000000, - LDNF1SH_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01200000, - LDNF1W_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01400000, - LDNF1W_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01600000, - LDNF1SB_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01800000, - LDNF1SB_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01A00000, - LDNF1SB_z_p_bi_s16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01C00000, - LDNF1D_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01E00000 -}; - -enum SVEContiguousNonTemporalLoad_ScalarPlusImmOp { - SVEContiguousNonTemporalLoad_ScalarPlusImmFixed = 0xA400E000, - SVEContiguousNonTemporalLoad_ScalarPlusImmFMask = 0xFE70E000, - SVEContiguousNonTemporalLoad_ScalarPlusImmMask = 0xFFF0E000, - LDNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed, - LDNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x00800000, - LDNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01000000, - LDNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01800000 -}; - -enum SVEContiguousNonTemporalLoad_ScalarPlusScalarOp { - SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed = 0xA400C000, - SVEContiguousNonTemporalLoad_ScalarPlusScalarFMask = 0xFE60E000, - SVEContiguousNonTemporalLoad_ScalarPlusScalarMask = 0xFFE0E000, - LDNT1B_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed, - LDNT1H_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x00800000, - LDNT1W_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01000000, - LDNT1D_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01800000 -}; - -enum SVEContiguousNonTemporalStore_ScalarPlusImmOp { - SVEContiguousNonTemporalStore_ScalarPlusImmFixed = 0xE410E000, - SVEContiguousNonTemporalStore_ScalarPlusImmFMask = 0xFE70E000, - SVEContiguousNonTemporalStore_ScalarPlusImmMask = 0xFFF0E000, - STNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed, - STNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x00800000, - STNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01000000, - STNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01800000 -}; - -enum SVEContiguousNonTemporalStore_ScalarPlusScalarOp { - SVEContiguousNonTemporalStore_ScalarPlusScalarFixed = 0xE4006000, - SVEContiguousNonTemporalStore_ScalarPlusScalarFMask = 0xFE60E000, - SVEContiguousNonTemporalStore_ScalarPlusScalarMask = 0xFFE0E000, - STNT1B_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed, - STNT1H_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x00800000, - STNT1W_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01000000, - STNT1D_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01800000 -}; - -enum SVEContiguousPrefetch_ScalarPlusImmOp { - SVEContiguousPrefetch_ScalarPlusImmFixed = 0x85C00000, - SVEContiguousPrefetch_ScalarPlusImmFMask = 0xFFC08010, - SVEContiguousPrefetch_ScalarPlusImmMask = 0xFFC0E010, - PRFB_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed, - PRFH_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00002000, - PRFW_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00004000, - PRFD_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00006000 -}; - -enum SVEContiguousPrefetch_ScalarPlusScalarOp { - SVEContiguousPrefetch_ScalarPlusScalarFixed = 0x8400C000, - SVEContiguousPrefetch_ScalarPlusScalarFMask = 0xFE60E010, - SVEContiguousPrefetch_ScalarPlusScalarMask = 0xFFE0E010, - PRFB_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed, - PRFH_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x00800000, - PRFW_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01000000, - PRFD_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01800000 -}; - -enum SVEContiguousStore_ScalarPlusImmOp { - SVEContiguousStore_ScalarPlusImmFixed = 0xE400E000, - SVEContiguousStore_ScalarPlusImmFMask = 0xFE10E000, - SVEContiguousStore_ScalarPlusImmMask = 0xFF90E000, - ST1B_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed, - ST1H_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x00800000, - ST1W_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01000000, - ST1D_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01800000 -}; - -enum SVEContiguousStore_ScalarPlusScalarOp { - SVEContiguousStore_ScalarPlusScalarFixed = 0xE4004000, - SVEContiguousStore_ScalarPlusScalarFMask = 0xFE00E000, - SVEContiguousStore_ScalarPlusScalarMask = 0xFF80E000, - ST1B_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed, - ST1H_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x00800000, - ST1W_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01000000, - ST1D_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01800000 -}; - -enum SVECopyFPImm_PredicatedOp { - SVECopyFPImm_PredicatedFixed = 0x0510C000, - SVECopyFPImm_PredicatedFMask = 0xFF30E000, - SVECopyFPImm_PredicatedMask = 0xFF30E000, - FCPY_z_p_i = SVECopyFPImm_PredicatedFixed -}; - -enum SVECopyGeneralRegisterToVector_PredicatedOp { - SVECopyGeneralRegisterToVector_PredicatedFixed = 0x0528A000, - SVECopyGeneralRegisterToVector_PredicatedFMask = 0xFF3FE000, - SVECopyGeneralRegisterToVector_PredicatedMask = 0xFF3FE000, - CPY_z_p_r = SVECopyGeneralRegisterToVector_PredicatedFixed -}; - -enum SVECopyIntImm_PredicatedOp { - SVECopyIntImm_PredicatedFixed = 0x05100000, - SVECopyIntImm_PredicatedFMask = 0xFF308000, - SVECopyIntImm_PredicatedMask = 0xFF308000, - CPY_z_p_i = SVECopyIntImm_PredicatedFixed -}; - -enum SVECopySIMDFPScalarRegisterToVector_PredicatedOp { - SVECopySIMDFPScalarRegisterToVector_PredicatedFixed = 0x05208000, - SVECopySIMDFPScalarRegisterToVector_PredicatedFMask = 0xFF3FE000, - SVECopySIMDFPScalarRegisterToVector_PredicatedMask = 0xFF3FE000, - CPY_z_p_v = SVECopySIMDFPScalarRegisterToVector_PredicatedFixed -}; - -enum SVEElementCountOp { - SVEElementCountFixed = 0x0420E000, - SVEElementCountFMask = 0xFF30F800, - SVEElementCountMask = 0xFFF0FC00, - CNTB_r_s = SVEElementCountFixed, - CNTH_r_s = SVEElementCountFixed | 0x00400000, - CNTW_r_s = SVEElementCountFixed | 0x00800000, - CNTD_r_s = SVEElementCountFixed | 0x00C00000 -}; - -enum SVEExtractElementToGeneralRegisterOp { - SVEExtractElementToGeneralRegisterFixed = 0x0520A000, - SVEExtractElementToGeneralRegisterFMask = 0xFF3EE000, - SVEExtractElementToGeneralRegisterMask = 0xFF3FE000, - LASTA_r_p_z = SVEExtractElementToGeneralRegisterFixed, - LASTB_r_p_z = SVEExtractElementToGeneralRegisterFixed | 0x00010000 -}; - -enum SVEExtractElementToSIMDFPScalarRegisterOp { - SVEExtractElementToSIMDFPScalarRegisterFixed = 0x05228000, - SVEExtractElementToSIMDFPScalarRegisterFMask = 0xFF3EE000, - SVEExtractElementToSIMDFPScalarRegisterMask = 0xFF3FE000, - LASTA_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed, - LASTB_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed | 0x00010000 -}; - -enum SVEFFRInitialiseOp { - SVEFFRInitialiseFixed = 0x252C9000, - SVEFFRInitialiseFMask = 0xFF3FFFFF, - SVEFFRInitialiseMask = 0xFFFFFFFF, - SETFFR_f = SVEFFRInitialiseFixed -}; - -enum SVEFFRWriteFromPredicateOp { - SVEFFRWriteFromPredicateFixed = 0x25289000, - SVEFFRWriteFromPredicateFMask = 0xFF3FFE1F, - SVEFFRWriteFromPredicateMask = 0xFFFFFE1F, - WRFFR_f_p = SVEFFRWriteFromPredicateFixed -}; - -enum SVEFPAccumulatingReductionOp { - SVEFPAccumulatingReductionFixed = 0x65182000, - SVEFPAccumulatingReductionFMask = 0xFF38E000, - SVEFPAccumulatingReductionMask = 0xFF3FE000, - FADDA_v_p_z = SVEFPAccumulatingReductionFixed -}; - -enum SVEFPArithmeticUnpredicatedOp { - SVEFPArithmeticUnpredicatedFixed = 0x65000000, - SVEFPArithmeticUnpredicatedFMask = 0xFF20E000, - SVEFPArithmeticUnpredicatedMask = 0xFF20FC00, - FADD_z_zz = SVEFPArithmeticUnpredicatedFixed, - FSUB_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000400, - FMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000800, - FTSMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000C00, - FRECPS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001800, - FRSQRTS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001C00 -}; - -enum SVEFPArithmeticWithImm_PredicatedOp { - SVEFPArithmeticWithImm_PredicatedFixed = 0x65188000, - SVEFPArithmeticWithImm_PredicatedFMask = 0xFF38E3C0, - SVEFPArithmeticWithImm_PredicatedMask = 0xFF3FE3C0, - FADD_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed, - FSUB_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00010000, - FMUL_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00020000, - FSUBR_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00030000, - FMAXNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00040000, - FMINNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00050000, - FMAX_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00060000, - FMIN_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00070000 -}; - -enum SVEFPArithmetic_PredicatedOp { - SVEFPArithmetic_PredicatedFixed = 0x65008000, - SVEFPArithmetic_PredicatedFMask = 0xFF30E000, - SVEFPArithmetic_PredicatedMask = 0xFF3FE000, - FADD_z_p_zz = SVEFPArithmetic_PredicatedFixed, - FSUB_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00010000, - FMUL_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00020000, - FSUBR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00030000, - FMAXNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00040000, - FMINNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00050000, - FMAX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00060000, - FMIN_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00070000, - FABD_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00080000, - FSCALE_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00090000, - FMULX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000A0000, - FDIVR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000C0000, - FDIV_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000D0000 -}; - -enum SVEFPCompareVectorsOp { - SVEFPCompareVectorsFixed = 0x65004000, - SVEFPCompareVectorsFMask = 0xFF204000, - SVEFPCompareVectorsMask = 0xFF20E010, - FCMGE_p_p_zz = SVEFPCompareVectorsFixed, - FCMGT_p_p_zz = SVEFPCompareVectorsFixed | 0x00000010, - FCMEQ_p_p_zz = SVEFPCompareVectorsFixed | 0x00002000, - FCMNE_p_p_zz = SVEFPCompareVectorsFixed | 0x00002010, - FCMUO_p_p_zz = SVEFPCompareVectorsFixed | 0x00008000, - FACGE_p_p_zz = SVEFPCompareVectorsFixed | 0x00008010, - FACGT_p_p_zz = SVEFPCompareVectorsFixed | 0x0000A010 -}; - -enum SVEFPCompareWithZeroOp { - SVEFPCompareWithZeroFixed = 0x65102000, - SVEFPCompareWithZeroFMask = 0xFF38E000, - SVEFPCompareWithZeroMask = 0xFF3FE010, - FCMGE_p_p_z0 = SVEFPCompareWithZeroFixed, - FCMGT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00000010, - FCMLT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010000, - FCMLE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010010, - FCMEQ_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00020000, - FCMNE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00030000 -}; - -enum SVEFPComplexAdditionOp { - SVEFPComplexAdditionFixed = 0x64008000, - SVEFPComplexAdditionFMask = 0xFF3EE000, - SVEFPComplexAdditionMask = 0xFF3EE000, - FCADD_z_p_zz = SVEFPComplexAdditionFixed -}; - -enum SVEFPComplexMulAddOp { - SVEFPComplexMulAddFixed = 0x64000000, - SVEFPComplexMulAddFMask = 0xFF208000, - SVEFPComplexMulAddMask = 0xFF208000, - FCMLA_z_p_zzz = SVEFPComplexMulAddFixed -}; - -enum SVEFPComplexMulAddIndexOp { - SVEFPComplexMulAddIndexFixed = 0x64201000, - SVEFPComplexMulAddIndexFMask = 0xFF20F000, - SVEFPComplexMulAddIndexMask = 0xFFE0F000, - FCMLA_z_zzzi_h = SVEFPComplexMulAddIndexFixed | 0x00800000, - FCMLA_z_zzzi_s = SVEFPComplexMulAddIndexFixed | 0x00C00000 -}; - -enum SVEFPConvertPrecisionOp { - SVEFPConvertPrecisionFixed = 0x6508A000, - SVEFPConvertPrecisionFMask = 0xFF3CE000, - SVEFPConvertPrecisionMask = 0xFFFFE000, - FCVT_z_p_z_s2h = SVEFPConvertPrecisionFixed | 0x00800000, - FCVT_z_p_z_h2s = SVEFPConvertPrecisionFixed | 0x00810000, - FCVT_z_p_z_d2h = SVEFPConvertPrecisionFixed | 0x00C00000, - FCVT_z_p_z_h2d = SVEFPConvertPrecisionFixed | 0x00C10000, - FCVT_z_p_z_d2s = SVEFPConvertPrecisionFixed | 0x00C20000, - FCVT_z_p_z_s2d = SVEFPConvertPrecisionFixed | 0x00C30000 -}; - -enum SVEFPConvertToIntOp { - SVEFPConvertToIntFixed = 0x6518A000, - SVEFPConvertToIntFMask = 0xFF38E000, - SVEFPConvertToIntMask = 0xFFFFE000, - FCVTZS_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00420000, - FCVTZU_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00430000, - FCVTZS_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00440000, - FCVTZU_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00450000, - FCVTZS_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00460000, - FCVTZU_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00470000, - FCVTZS_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00840000, - FCVTZU_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00850000, - FCVTZS_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C00000, - FCVTZU_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C10000, - FCVTZS_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C40000, - FCVTZU_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C50000, - FCVTZS_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C60000, - FCVTZU_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C70000 -}; - -enum SVEFPExponentialAcceleratorOp { - SVEFPExponentialAcceleratorFixed = 0x0420B800, - SVEFPExponentialAcceleratorFMask = 0xFF20FC00, - SVEFPExponentialAcceleratorMask = 0xFF3FFC00, - FEXPA_z_z = SVEFPExponentialAcceleratorFixed -}; - -enum SVEFPFastReductionOp { - SVEFPFastReductionFixed = 0x65002000, - SVEFPFastReductionFMask = 0xFF38E000, - SVEFPFastReductionMask = 0xFF3FE000, - FADDV_v_p_z = SVEFPFastReductionFixed, - FMAXNMV_v_p_z = SVEFPFastReductionFixed | 0x00040000, - FMINNMV_v_p_z = SVEFPFastReductionFixed | 0x00050000, - FMAXV_v_p_z = SVEFPFastReductionFixed | 0x00060000, - FMINV_v_p_z = SVEFPFastReductionFixed | 0x00070000 -}; - -enum SVEFPMulAddOp { - SVEFPMulAddFixed = 0x65200000, - SVEFPMulAddFMask = 0xFF200000, - SVEFPMulAddMask = 0xFF20E000, - FMLA_z_p_zzz = SVEFPMulAddFixed, - FMLS_z_p_zzz = SVEFPMulAddFixed | 0x00002000, - FNMLA_z_p_zzz = SVEFPMulAddFixed | 0x00004000, - FNMLS_z_p_zzz = SVEFPMulAddFixed | 0x00006000, - FMAD_z_p_zzz = SVEFPMulAddFixed | 0x00008000, - FMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000A000, - FNMAD_z_p_zzz = SVEFPMulAddFixed | 0x0000C000, - FNMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000E000 -}; - -enum SVEFPMulAddIndexOp { - SVEFPMulAddIndexFixed = 0x64200000, - SVEFPMulAddIndexFMask = 0xFF20F800, - SVEFPMulAddIndexMask = 0xFFE0FC00, - FMLA_z_zzzi_h = SVEFPMulAddIndexFixed, - FMLA_z_zzzi_h_i3h = FMLA_z_zzzi_h | 0x00400000, - FMLS_z_zzzi_h = SVEFPMulAddIndexFixed | 0x00000400, - FMLS_z_zzzi_h_i3h = FMLS_z_zzzi_h | 0x00400000, - FMLA_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800000, - FMLS_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800400, - FMLA_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00000, - FMLS_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00400 -}; - -enum SVEFPMulIndexOp { - SVEFPMulIndexFixed = 0x64202000, - SVEFPMulIndexFMask = 0xFF20FC00, - SVEFPMulIndexMask = 0xFFE0FC00, - FMUL_z_zzi_h = SVEFPMulIndexFixed, - FMUL_z_zzi_h_i3h = FMUL_z_zzi_h | 0x00400000, - FMUL_z_zzi_s = SVEFPMulIndexFixed | 0x00800000, - FMUL_z_zzi_d = SVEFPMulIndexFixed | 0x00C00000 -}; - -enum SVEFPRoundToIntegralValueOp { - SVEFPRoundToIntegralValueFixed = 0x6500A000, - SVEFPRoundToIntegralValueFMask = 0xFF38E000, - SVEFPRoundToIntegralValueMask = 0xFF3FE000, - FRINTN_z_p_z = SVEFPRoundToIntegralValueFixed, - FRINTP_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00010000, - FRINTM_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00020000, - FRINTZ_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00030000, - FRINTA_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00040000, - FRINTX_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00060000, - FRINTI_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00070000 -}; - -enum SVEFPTrigMulAddCoefficientOp { - SVEFPTrigMulAddCoefficientFixed = 0x65108000, - SVEFPTrigMulAddCoefficientFMask = 0xFF38FC00, - SVEFPTrigMulAddCoefficientMask = 0xFF38FC00, - FTMAD_z_zzi = SVEFPTrigMulAddCoefficientFixed -}; - -enum SVEFPTrigSelectCoefficientOp { - SVEFPTrigSelectCoefficientFixed = 0x0420B000, - SVEFPTrigSelectCoefficientFMask = 0xFF20F800, - SVEFPTrigSelectCoefficientMask = 0xFF20FC00, - FTSSEL_z_zz = SVEFPTrigSelectCoefficientFixed -}; - -enum SVEFPUnaryOpOp { - SVEFPUnaryOpFixed = 0x650CA000, - SVEFPUnaryOpFMask = 0xFF3CE000, - SVEFPUnaryOpMask = 0xFF3FE000, - FRECPX_z_p_z = SVEFPUnaryOpFixed, - FSQRT_z_p_z = SVEFPUnaryOpFixed | 0x00010000 -}; - -enum SVEFPUnaryOpUnpredicatedOp { - SVEFPUnaryOpUnpredicatedFixed = 0x65083000, - SVEFPUnaryOpUnpredicatedFMask = 0xFF38F000, - SVEFPUnaryOpUnpredicatedMask = 0xFF3FFC00, - FRECPE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00060000, - FRSQRTE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00070000 -}; - -enum SVEIncDecByPredicateCountOp { - SVEIncDecByPredicateCountFixed = 0x25288000, - SVEIncDecByPredicateCountFMask = 0xFF38F000, - SVEIncDecByPredicateCountMask = 0xFF3FFE00, - SQINCP_z_p_z = SVEIncDecByPredicateCountFixed, - SQINCP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00000800, - SQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00000C00, - UQINCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00010000, - UQINCP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00010800, - UQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00010C00, - SQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00020000, - SQDECP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00020800, - SQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00020C00, - UQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00030000, - UQDECP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00030800, - UQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00030C00, - INCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00040000, - INCP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00040800, - DECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00050000, - DECP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00050800 -}; - -enum SVEIncDecRegisterByElementCountOp { - SVEIncDecRegisterByElementCountFixed = 0x0430E000, - SVEIncDecRegisterByElementCountFMask = 0xFF30F800, - SVEIncDecRegisterByElementCountMask = 0xFFF0FC00, - INCB_r_rs = SVEIncDecRegisterByElementCountFixed, - DECB_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00000400, - INCH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400000, - DECH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400400, - INCW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800000, - DECW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800400, - INCD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00000, - DECD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00400 -}; - -enum SVEIncDecVectorByElementCountOp { - SVEIncDecVectorByElementCountFixed = 0x0430C000, - SVEIncDecVectorByElementCountFMask = 0xFF30F800, - SVEIncDecVectorByElementCountMask = 0xFFF0FC00, - INCH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400000, - DECH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400400, - INCW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800000, - DECW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800400, - INCD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00000, - DECD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00400 -}; - -enum SVEIndexGenerationOp { - SVEIndexGenerationFixed = 0x04204000, - SVEIndexGenerationFMask = 0xFF20F000, - SVEIndexGenerationMask = 0xFF20FC00, - INDEX_z_ii = SVEIndexGenerationFixed, - INDEX_z_ri = SVEIndexGenerationFixed | 0x00000400, - INDEX_z_ir = SVEIndexGenerationFixed | 0x00000800, - INDEX_z_rr = SVEIndexGenerationFixed | 0x00000C00 -}; - -enum SVEInsertGeneralRegisterOp { - SVEInsertGeneralRegisterFixed = 0x05243800, - SVEInsertGeneralRegisterFMask = 0xFF3FFC00, - SVEInsertGeneralRegisterMask = 0xFF3FFC00, - INSR_z_r = SVEInsertGeneralRegisterFixed -}; - -enum SVEInsertSIMDFPScalarRegisterOp { - SVEInsertSIMDFPScalarRegisterFixed = 0x05343800, - SVEInsertSIMDFPScalarRegisterFMask = 0xFF3FFC00, - SVEInsertSIMDFPScalarRegisterMask = 0xFF3FFC00, - INSR_z_v = SVEInsertSIMDFPScalarRegisterFixed -}; - -enum SVEIntAddSubtractImm_UnpredicatedOp { - SVEIntAddSubtractImm_UnpredicatedFixed = 0x2520C000, - SVEIntAddSubtractImm_UnpredicatedFMask = 0xFF38C000, - SVEIntAddSubtractImm_UnpredicatedMask = 0xFF3FC000, - ADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed, - SUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00010000, - SUBR_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00030000, - SQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00040000, - UQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00050000, - SQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00060000, - UQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00070000 -}; - -enum SVEIntAddSubtractVectors_PredicatedOp { - SVEIntAddSubtractVectors_PredicatedFixed = 0x04000000, - SVEIntAddSubtractVectors_PredicatedFMask = 0xFF38E000, - SVEIntAddSubtractVectors_PredicatedMask = 0xFF3FE000, - ADD_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed, - SUB_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00010000, - SUBR_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00030000 -}; - -enum SVEIntArithmeticUnpredicatedOp { - SVEIntArithmeticUnpredicatedFixed = 0x04200000, - SVEIntArithmeticUnpredicatedFMask = 0xFF20E000, - SVEIntArithmeticUnpredicatedMask = 0xFF20FC00, - ADD_z_zz = SVEIntArithmeticUnpredicatedFixed, - SUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00000400, - SQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001000, - UQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001400, - SQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001800, - UQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001C00 -}; - -enum SVEIntCompareScalarCountAndLimitOp { - SVEIntCompareScalarCountAndLimitFixed = 0x25200000, - SVEIntCompareScalarCountAndLimitFMask = 0xFF20E000, - SVEIntCompareScalarCountAndLimitMask = 0xFF20EC10, - WHILELT_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000400, - WHILELE_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000410, - WHILELO_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C00, - WHILELS_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C10 -}; - -enum SVEIntCompareSignedImmOp { - SVEIntCompareSignedImmFixed = 0x25000000, - SVEIntCompareSignedImmFMask = 0xFF204000, - SVEIntCompareSignedImmMask = 0xFF20E010, - CMPGE_p_p_zi = SVEIntCompareSignedImmFixed, - CMPGT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00000010, - CMPLT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002000, - CMPLE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002010, - CMPEQ_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008000, - CMPNE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008010 -}; - -enum SVEIntCompareUnsignedImmOp { - SVEIntCompareUnsignedImmFixed = 0x24200000, - SVEIntCompareUnsignedImmFMask = 0xFF200000, - SVEIntCompareUnsignedImmMask = 0xFF202010, - CMPHS_p_p_zi = SVEIntCompareUnsignedImmFixed, - CMPHI_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00000010, - CMPLO_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002000, - CMPLS_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002010 -}; - -enum SVEIntCompareVectorsOp { - SVEIntCompareVectorsFixed = 0x24000000, - SVEIntCompareVectorsFMask = 0xFF200000, - SVEIntCompareVectorsMask = 0xFF20E010, - CMPHS_p_p_zz = SVEIntCompareVectorsFixed, - CMPHI_p_p_zz = SVEIntCompareVectorsFixed | 0x00000010, - CMPEQ_p_p_zw = SVEIntCompareVectorsFixed | 0x00002000, - CMPNE_p_p_zw = SVEIntCompareVectorsFixed | 0x00002010, - CMPGE_p_p_zw = SVEIntCompareVectorsFixed | 0x00004000, - CMPGT_p_p_zw = SVEIntCompareVectorsFixed | 0x00004010, - CMPLT_p_p_zw = SVEIntCompareVectorsFixed | 0x00006000, - CMPLE_p_p_zw = SVEIntCompareVectorsFixed | 0x00006010, - CMPGE_p_p_zz = SVEIntCompareVectorsFixed | 0x00008000, - CMPGT_p_p_zz = SVEIntCompareVectorsFixed | 0x00008010, - CMPEQ_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A000, - CMPNE_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A010, - CMPHS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C000, - CMPHI_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C010, - CMPLO_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E000, - CMPLS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E010 -}; - -enum SVEIntConvertToFPOp { - SVEIntConvertToFPFixed = 0x6510A000, - SVEIntConvertToFPFMask = 0xFF38E000, - SVEIntConvertToFPMask = 0xFFFFE000, - SCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00420000, - UCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00430000, - SCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00440000, - UCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00450000, - SCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00460000, - UCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00470000, - SCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00840000, - UCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00850000, - SCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C00000, - UCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C10000, - SCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C40000, - UCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C50000, - SCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C60000, - UCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C70000 -}; - -enum SVEIntDivideVectors_PredicatedOp { - SVEIntDivideVectors_PredicatedFixed = 0x04140000, - SVEIntDivideVectors_PredicatedFMask = 0xFF3CE000, - SVEIntDivideVectors_PredicatedMask = 0xFF3FE000, - SDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed, - UDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00010000, - SDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00020000, - UDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00030000 -}; - -enum SVEIntMinMaxDifference_PredicatedOp { - SVEIntMinMaxDifference_PredicatedFixed = 0x04080000, - SVEIntMinMaxDifference_PredicatedFMask = 0xFF38E000, - SVEIntMinMaxDifference_PredicatedMask = 0xFF3FE000, - SMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed, - UMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00010000, - SMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00020000, - UMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00030000, - SABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00040000, - UABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00050000 -}; - -enum SVEIntMinMaxImm_UnpredicatedOp { - SVEIntMinMaxImm_UnpredicatedFixed = 0x2528C000, - SVEIntMinMaxImm_UnpredicatedFMask = 0xFF38C000, - SVEIntMinMaxImm_UnpredicatedMask = 0xFF3FE000, - SMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed, - UMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00010000, - SMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00020000, - UMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00030000 -}; - -enum SVEIntMulAddPredicatedOp { - SVEIntMulAddPredicatedFixed = 0x04004000, - SVEIntMulAddPredicatedFMask = 0xFF204000, - SVEIntMulAddPredicatedMask = 0xFF20E000, - MLA_z_p_zzz = SVEIntMulAddPredicatedFixed, - MLS_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00002000, - MAD_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00008000, - MSB_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x0000A000 -}; - -enum SVEIntMulAddUnpredicatedOp { - SVEIntMulAddUnpredicatedFixed = 0x44000000, - SVEIntMulAddUnpredicatedFMask = 0xFF208000, - SVEIntMulAddUnpredicatedMask = 0xFF20FC00, - SDOT_z_zzz = SVEIntMulAddUnpredicatedFixed, - UDOT_z_zzz = SVEIntMulAddUnpredicatedFixed | 0x00000400 -}; - -enum SVEIntMulImm_UnpredicatedOp { - SVEIntMulImm_UnpredicatedFixed = 0x2530C000, - SVEIntMulImm_UnpredicatedFMask = 0xFF38C000, - SVEIntMulImm_UnpredicatedMask = 0xFF3FE000, - MUL_z_zi = SVEIntMulImm_UnpredicatedFixed -}; - -enum SVEIntMulVectors_PredicatedOp { - SVEIntMulVectors_PredicatedFixed = 0x04100000, - SVEIntMulVectors_PredicatedFMask = 0xFF3CE000, - SVEIntMulVectors_PredicatedMask = 0xFF3FE000, - MUL_z_p_zz = SVEIntMulVectors_PredicatedFixed, - SMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00020000, - UMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00030000 -}; - -enum SVEMovprfxOp { - SVEMovprfxFixed = 0x04002000, - SVEMovprfxFMask = 0xFF20E000, - SVEMovprfxMask = 0xFF3EE000, - MOVPRFX_z_p_z = SVEMovprfxFixed | 0x00100000 -}; - -enum SVEIntReductionOp { - SVEIntReductionFixed = 0x04002000, - SVEIntReductionFMask = 0xFF20E000, - SVEIntReductionMask = 0xFF3FE000, - SADDV_r_p_z = SVEIntReductionFixed, - UADDV_r_p_z = SVEIntReductionFixed | 0x00010000, - SMAXV_r_p_z = SVEIntReductionFixed | 0x00080000, - UMAXV_r_p_z = SVEIntReductionFixed | 0x00090000, - SMINV_r_p_z = SVEIntReductionFixed | 0x000A0000, - UMINV_r_p_z = SVEIntReductionFixed | 0x000B0000 -}; - -enum SVEIntReductionLogicalOp { - SVEIntReductionLogicalFixed = 0x04182000, - SVEIntReductionLogicalFMask = 0xFF38E000, - SVEIntReductionLogicalMask = 0xFF3FE000, - ORV_r_p_z = SVEIntReductionLogicalFixed | 0x00180000, - EORV_r_p_z = SVEIntReductionLogicalFixed | 0x00190000, - ANDV_r_p_z = SVEIntReductionLogicalFixed | 0x001A0000 -}; - -enum SVEIntUnaryArithmeticPredicatedOp { - SVEIntUnaryArithmeticPredicatedFixed = 0x0400A000, - SVEIntUnaryArithmeticPredicatedFMask = 0xFF20E000, - SVEIntUnaryArithmeticPredicatedMask = 0xFF3FE000, - SXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00100000, - UXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00110000, - SXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00120000, - UXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00130000, - SXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00140000, - UXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00150000, - ABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00160000, - NEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00170000, - CLS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00180000, - CLZ_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00190000, - CNT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001A0000, - CNOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001B0000, - FABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001C0000, - FNEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001D0000, - NOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001E0000 -}; - -enum SVELoadAndBroadcastElementOp { - SVELoadAndBroadcastElementFixed = 0x84408000, - SVELoadAndBroadcastElementFMask = 0xFE408000, - SVELoadAndBroadcastElementMask = 0xFFC0E000, - LD1RB_z_p_bi_u8 = SVELoadAndBroadcastElementFixed, - LD1RB_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00002000, - LD1RB_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00004000, - LD1RB_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00006000, - LD1RSW_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x00800000, - LD1RH_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00802000, - LD1RH_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00804000, - LD1RH_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00806000, - LD1RSH_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01000000, - LD1RSH_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01002000, - LD1RW_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x01004000, - LD1RW_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01006000, - LD1RSB_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01800000, - LD1RSB_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01802000, - LD1RSB_z_p_bi_s16 = SVELoadAndBroadcastElementFixed | 0x01804000, - LD1RD_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01806000 -}; - -enum SVELoadAndBroadcastQuadword_ScalarPlusImmOp { - SVELoadAndBroadcastQuadword_ScalarPlusImmFixed = 0xA4002000, - SVELoadAndBroadcastQuadword_ScalarPlusImmFMask = 0xFE10E000, - SVELoadAndBroadcastQuadword_ScalarPlusImmMask = 0xFFF0E000, - LD1RQB_z_p_bi_u8 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed, - LD1RQH_z_p_bi_u16 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x00800000, - LD1RQW_z_p_bi_u32 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01000000, - LD1RQD_z_p_bi_u64 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01800000 -}; - -enum SVELoadAndBroadcastQuadword_ScalarPlusScalarOp { - SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed = 0xA4000000, - SVELoadAndBroadcastQuadword_ScalarPlusScalarFMask = 0xFE00E000, - SVELoadAndBroadcastQuadword_ScalarPlusScalarMask = 0xFFE0E000, - LD1RQB_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed, - LD1RQH_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x00800000, - LD1RQW_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01000000, - LD1RQD_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01800000 -}; - -enum SVELoadMultipleStructures_ScalarPlusImmOp { - SVELoadMultipleStructures_ScalarPlusImmFixed = 0xA400E000, - SVELoadMultipleStructures_ScalarPlusImmFMask = 0xFE10E000, - SVELoadMultipleStructures_ScalarPlusImmMask = 0xFFF0E000, - LD2B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00200000, - LD3B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00400000, - LD4B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00600000, - LD2H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00A00000, - LD3H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00C00000, - LD4H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00E00000, - LD2W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01200000, - LD3W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01400000, - LD4W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01600000, - LD2D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01A00000, - LD3D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01C00000, - LD4D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01E00000 -}; - -enum SVELoadMultipleStructures_ScalarPlusScalarOp { - SVELoadMultipleStructures_ScalarPlusScalarFixed = 0xA400C000, - SVELoadMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000, - SVELoadMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000, - LD2B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00200000, - LD3B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00400000, - LD4B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00600000, - LD2H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00A00000, - LD3H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00C00000, - LD4H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00E00000, - LD2W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01200000, - LD3W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01400000, - LD4W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01600000, - LD2D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01A00000, - LD3D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01C00000, - LD4D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01E00000 -}; - -enum SVELoadPredicateRegisterOp { - SVELoadPredicateRegisterFixed = 0x85800000, - SVELoadPredicateRegisterFMask = 0xFFC0E010, - SVELoadPredicateRegisterMask = 0xFFC0E010, - LDR_p_bi = SVELoadPredicateRegisterFixed -}; - -enum SVELoadVectorRegisterOp { - SVELoadVectorRegisterFixed = 0x85804000, - SVELoadVectorRegisterFMask = 0xFFC0E000, - SVELoadVectorRegisterMask = 0xFFC0E000, - LDR_z_bi = SVELoadVectorRegisterFixed -}; - -enum SVEMulIndexOp { - SVEMulIndexFixed = 0x44200000, - SVEMulIndexFMask = 0xFF200000, - SVEMulIndexMask = 0xFFE0FC00, - SDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800000, - UDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800400, - SDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00000, - UDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00400 -}; - -enum SVEPartitionBreakConditionOp { - SVEPartitionBreakConditionFixed = 0x25104000, - SVEPartitionBreakConditionFMask = 0xFF3FC200, - SVEPartitionBreakConditionMask = 0xFFFFC200, - BRKA_p_p_p = SVEPartitionBreakConditionFixed, - BRKAS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00400000, - BRKB_p_p_p = SVEPartitionBreakConditionFixed | 0x00800000, - BRKBS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00C00000 -}; - -enum SVEPermutePredicateElementsOp { - SVEPermutePredicateElementsFixed = 0x05204000, - SVEPermutePredicateElementsFMask = 0xFF30E210, - SVEPermutePredicateElementsMask = 0xFF30FE10, - ZIP1_p_pp = SVEPermutePredicateElementsFixed, - ZIP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000400, - UZP1_p_pp = SVEPermutePredicateElementsFixed | 0x00000800, - UZP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000C00, - TRN1_p_pp = SVEPermutePredicateElementsFixed | 0x00001000, - TRN2_p_pp = SVEPermutePredicateElementsFixed | 0x00001400 -}; - -enum SVEPermuteVectorExtractOp { - SVEPermuteVectorExtractFixed = 0x05200000, - SVEPermuteVectorExtractFMask = 0xFF20E000, - SVEPermuteVectorExtractMask = 0xFFE0E000, - EXT_z_zi_des = SVEPermuteVectorExtractFixed -}; - -enum SVEPermuteVectorInterleavingOp { - SVEPermuteVectorInterleavingFixed = 0x05206000, - SVEPermuteVectorInterleavingFMask = 0xFF20E000, - SVEPermuteVectorInterleavingMask = 0xFF20FC00, - ZIP1_z_zz = SVEPermuteVectorInterleavingFixed, - ZIP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000400, - UZP1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000800, - UZP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000C00, - TRN1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001000, - TRN2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001400 -}; - -enum SVEPredicateCountOp { - SVEPredicateCountFixed = 0x25208000, - SVEPredicateCountFMask = 0xFF38C000, - SVEPredicateCountMask = 0xFF3FC200, - CNTP_r_p_p = SVEPredicateCountFixed -}; - -enum SVEPredicateFirstActiveOp { - SVEPredicateFirstActiveFixed = 0x2518C000, - SVEPredicateFirstActiveFMask = 0xFF3FFE10, - SVEPredicateFirstActiveMask = 0xFFFFFE10, - PFIRST_p_p_p = SVEPredicateFirstActiveFixed | 0x00400000 -}; - -enum SVEPredicateInitializeOp { - SVEPredicateInitializeFixed = 0x2518E000, - SVEPredicateInitializeFMask = 0xFF3EFC10, - SVEPredicateInitializeMask = 0xFF3FFC10, - SVEPredicateInitializeSetFlagsBit = 0x00010000, - PTRUE_p_s = SVEPredicateInitializeFixed | 0x00000000, - PTRUES_p_s = SVEPredicateInitializeFixed | SVEPredicateInitializeSetFlagsBit -}; - -enum SVEPredicateLogicalOp { - SVEPredicateLogicalFixed = 0x25004000, - SVEPredicateLogicalFMask = 0xFF30C000, - SVEPredicateLogicalMask = 0xFFF0C210, - SVEPredicateLogicalSetFlagsBit = 0x00400000, - AND_p_p_pp_z = SVEPredicateLogicalFixed, - ANDS_p_p_pp_z = AND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - BIC_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000010, - BICS_p_p_pp_z = BIC_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - EOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000200, - EORS_p_p_pp_z = EOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - ORR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800000, - ORRS_p_p_pp_z = ORR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - ORN_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800010, - ORNS_p_p_pp_z = ORN_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - NAND_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800210, - NANDS_p_p_pp_z = NAND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - NOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800200, - NORS_p_p_pp_z = NOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, - SEL_p_p_pp = SVEPredicateLogicalFixed | 0x00000210 -}; - -enum SVEPredicateNextActiveOp { - SVEPredicateNextActiveFixed = 0x2519C400, - SVEPredicateNextActiveFMask = 0xFF3FFE10, - SVEPredicateNextActiveMask = 0xFF3FFE10, - PNEXT_p_p_p = SVEPredicateNextActiveFixed -}; - -enum SVEPredicateReadFromFFR_PredicatedOp { - SVEPredicateReadFromFFR_PredicatedFixed = 0x2518F000, - SVEPredicateReadFromFFR_PredicatedFMask = 0xFF3FFE10, - SVEPredicateReadFromFFR_PredicatedMask = 0xFFFFFE10, - RDFFR_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed, - RDFFRS_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed | 0x00400000 -}; - -enum SVEPredicateReadFromFFR_UnpredicatedOp { - SVEPredicateReadFromFFR_UnpredicatedFixed = 0x2519F000, - SVEPredicateReadFromFFR_UnpredicatedFMask = 0xFF3FFFF0, - SVEPredicateReadFromFFR_UnpredicatedMask = 0xFFFFFFF0, - RDFFR_p_f = SVEPredicateReadFromFFR_UnpredicatedFixed -}; - -enum SVEPredicateTestOp { - SVEPredicateTestFixed = 0x2510C000, - SVEPredicateTestFMask = 0xFF3FC210, - SVEPredicateTestMask = 0xFFFFC21F, - PTEST_p_p = SVEPredicateTestFixed | 0x00400000 -}; - -enum SVEPredicateZeroOp { - SVEPredicateZeroFixed = 0x2518E400, - SVEPredicateZeroFMask = 0xFF3FFFF0, - SVEPredicateZeroMask = 0xFFFFFFF0, - PFALSE_p = SVEPredicateZeroFixed -}; - -enum SVEPropagateBreakOp { - SVEPropagateBreakFixed = 0x2500C000, - SVEPropagateBreakFMask = 0xFF30C000, - SVEPropagateBreakMask = 0xFFF0C210, - BRKPA_p_p_pp = SVEPropagateBreakFixed, - BRKPB_p_p_pp = SVEPropagateBreakFixed | 0x00000010, - BRKPAS_p_p_pp = SVEPropagateBreakFixed | 0x00400000, - BRKPBS_p_p_pp = SVEPropagateBreakFixed | 0x00400010 -}; - -enum SVEPropagateBreakToNextPartitionOp { - SVEPropagateBreakToNextPartitionFixed = 0x25184000, - SVEPropagateBreakToNextPartitionFMask = 0xFFBFC210, - SVEPropagateBreakToNextPartitionMask = 0xFFFFC210, - BRKN_p_p_pp = SVEPropagateBreakToNextPartitionFixed, - BRKNS_p_p_pp = SVEPropagateBreakToNextPartitionFixed | 0x00400000 -}; - -enum SVEReversePredicateElementsOp { - SVEReversePredicateElementsFixed = 0x05344000, - SVEReversePredicateElementsFMask = 0xFF3FFE10, - SVEReversePredicateElementsMask = 0xFF3FFE10, - REV_p_p = SVEReversePredicateElementsFixed -}; - -enum SVEReverseVectorElementsOp { - SVEReverseVectorElementsFixed = 0x05383800, - SVEReverseVectorElementsFMask = 0xFF3FFC00, - SVEReverseVectorElementsMask = 0xFF3FFC00, - REV_z_z = SVEReverseVectorElementsFixed -}; - -enum SVEReverseWithinElementsOp { - SVEReverseWithinElementsFixed = 0x05248000, - SVEReverseWithinElementsFMask = 0xFF3CE000, - SVEReverseWithinElementsMask = 0xFF3FE000, - REVB_z_z = SVEReverseWithinElementsFixed, - REVH_z_z = SVEReverseWithinElementsFixed | 0x00010000, - REVW_z_z = SVEReverseWithinElementsFixed | 0x00020000, - RBIT_z_p_z = SVEReverseWithinElementsFixed | 0x00030000 -}; - -enum SVESaturatingIncDecRegisterByElementCountOp { - SVESaturatingIncDecRegisterByElementCountFixed = 0x0420F000, - SVESaturatingIncDecRegisterByElementCountFMask = 0xFF20F000, - SVESaturatingIncDecRegisterByElementCountMask = 0xFFF0FC00, - SQINCB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed, - UQINCB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000400, - SQDECB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000800, - UQDECB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000C00, - SQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100000, - UQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100400, - SQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100800, - UQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100C00, - SQINCH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400000, - UQINCH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400400, - SQDECH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400800, - UQDECH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400C00, - SQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500000, - UQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500400, - SQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500800, - UQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500C00, - SQINCW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800000, - UQINCW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800400, - SQDECW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800800, - UQDECW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800C00, - SQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900000, - UQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900400, - SQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900800, - UQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900C00, - SQINCD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00000, - UQINCD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00400, - SQDECD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00800, - UQDECD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00C00, - SQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00000, - UQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00400, - SQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00800, - UQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00C00 -}; - -enum SVESaturatingIncDecVectorByElementCountOp { - SVESaturatingIncDecVectorByElementCountFixed = 0x0420C000, - SVESaturatingIncDecVectorByElementCountFMask = 0xFF30F000, - SVESaturatingIncDecVectorByElementCountMask = 0xFFF0FC00, - SQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400000, - UQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400400, - SQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400800, - UQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400C00, - SQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800000, - UQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800400, - SQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800800, - UQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800C00, - SQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00000, - UQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00400, - SQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00800, - UQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00C00 -}; - -enum SVEStackFrameAdjustmentOp { - SVEStackFrameAdjustmentFixed = 0x04205000, - SVEStackFrameAdjustmentFMask = 0xFFA0F800, - SVEStackFrameAdjustmentMask = 0xFFE0F800, - ADDVL_r_ri = SVEStackFrameAdjustmentFixed, - ADDPL_r_ri = SVEStackFrameAdjustmentFixed | 0x00400000 -}; - -enum SVEStackFrameSizeOp { - SVEStackFrameSizeFixed = 0x04BF5000, - SVEStackFrameSizeFMask = 0xFFFFF800, - SVEStackFrameSizeMask = 0xFFFFF800, - RDVL_r_i = SVEStackFrameSizeFixed -}; - -enum SVEStoreMultipleStructures_ScalarPlusImmOp { - SVEStoreMultipleStructures_ScalarPlusImmFixed = 0xE410E000, - SVEStoreMultipleStructures_ScalarPlusImmFMask = 0xFE10E000, - SVEStoreMultipleStructures_ScalarPlusImmMask = 0xFFF0E000, - ST2B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00200000, - ST3B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00400000, - ST4B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00600000, - ST2H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00A00000, - ST3H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00C00000, - ST4H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00E00000, - ST2W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01200000, - ST3W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01400000, - ST4W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01600000, - ST2D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01A00000, - ST3D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01C00000, - ST4D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01E00000 -}; - -enum SVEStoreMultipleStructures_ScalarPlusScalarOp { - SVEStoreMultipleStructures_ScalarPlusScalarFixed = 0xE4006000, - SVEStoreMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000, - SVEStoreMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000, - ST2B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00200000, - ST3B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00400000, - ST4B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00600000, - ST2H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00A00000, - ST3H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00C00000, - ST4H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00E00000, - ST2W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01200000, - ST3W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01400000, - ST4W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01600000, - ST2D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01A00000, - ST3D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01C00000, - ST4D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01E00000 -}; - -enum SVEStorePredicateRegisterOp { - SVEStorePredicateRegisterFixed = 0xE5800000, - SVEStorePredicateRegisterFMask = 0xFFC0E010, - SVEStorePredicateRegisterMask = 0xFFC0E010, - STR_p_bi = SVEStorePredicateRegisterFixed -}; - -enum SVEStoreVectorRegisterOp { - SVEStoreVectorRegisterFixed = 0xE5804000, - SVEStoreVectorRegisterFMask = 0xFFC0E000, - SVEStoreVectorRegisterMask = 0xFFC0E000, - STR_z_bi = SVEStoreVectorRegisterFixed -}; - -enum SVETableLookupOp { - SVETableLookupFixed = 0x05203000, - SVETableLookupFMask = 0xFF20FC00, - SVETableLookupMask = 0xFF20FC00, - TBL_z_zz_1 = SVETableLookupFixed -}; - -enum SVEUnpackPredicateElementsOp { - SVEUnpackPredicateElementsFixed = 0x05304000, - SVEUnpackPredicateElementsFMask = 0xFFFEFE10, - SVEUnpackPredicateElementsMask = 0xFFFFFE10, - PUNPKLO_p_p = SVEUnpackPredicateElementsFixed, - PUNPKHI_p_p = SVEUnpackPredicateElementsFixed | 0x00010000 -}; - -enum SVEUnpackVectorElementsOp { - SVEUnpackVectorElementsFixed = 0x05303800, - SVEUnpackVectorElementsFMask = 0xFF3CFC00, - SVEUnpackVectorElementsMask = 0xFF3FFC00, - SUNPKLO_z_z = SVEUnpackVectorElementsFixed, - SUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00010000, - UUNPKLO_z_z = SVEUnpackVectorElementsFixed | 0x00020000, - UUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00030000 -}; - -enum SVEVectorSelectOp { - SVEVectorSelectFixed = 0x0520C000, - SVEVectorSelectFMask = 0xFF20C000, - SVEVectorSelectMask = 0xFF20C000, - SEL_z_p_zz = SVEVectorSelectFixed -}; - -enum SVEVectorSplice_DestructiveOp { - SVEVectorSplice_DestructiveFixed = 0x052C8000, - SVEVectorSplice_DestructiveFMask = 0xFF3FE000, - SVEVectorSplice_DestructiveMask = 0xFF3FE000, - SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed -}; - enum ReservedOp { ReservedFixed = 0x00000000, ReservedFMask = 0x1E000000, ReservedMask = 0xFFFF0000, + UDF = ReservedFixed | 0x00000000 }; diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc index a31e010d..f5e4fca5 100644 --- a/src/aarch64/cpu-aarch64.cc +++ b/src/aarch64/cpu-aarch64.cc @@ -39,15 +39,10 @@ namespace aarch64 { const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); -const IDRegister::Field AA64PFR0::kRAS(28); const IDRegister::Field AA64PFR0::kSVE(32); const IDRegister::Field AA64PFR0::kDIT(48); -const IDRegister::Field AA64PFR0::kCSV2(56); -const IDRegister::Field AA64PFR0::kCSV3(60); const IDRegister::Field AA64PFR1::kBT(0); -const IDRegister::Field AA64PFR1::kSSBS(4); -const IDRegister::Field AA64PFR1::kMTE(8); const IDRegister::Field AA64ISAR0::kAES(4); const IDRegister::Field AA64ISAR0::kSHA1(8); @@ -61,7 +56,6 @@ const IDRegister::Field AA64ISAR0::kSM4(40); const IDRegister::Field AA64ISAR0::kDP(44); const IDRegister::Field AA64ISAR0::kFHM(48); const IDRegister::Field AA64ISAR0::kTS(52); -const IDRegister::Field AA64ISAR0::kRNDR(60); const IDRegister::Field AA64ISAR1::kDPB(0); const IDRegister::Field AA64ISAR1::kAPA(4); @@ -74,41 +68,23 @@ const IDRegister::Field AA64ISAR1::kGPI(28); const IDRegister::Field AA64ISAR1::kFRINTTS(32); const IDRegister::Field AA64ISAR1::kSB(36); const IDRegister::Field AA64ISAR1::kSPECRES(40); -const IDRegister::Field AA64ISAR1::kBF16(44); -const IDRegister::Field AA64ISAR1::kDGH(48); -const IDRegister::Field AA64ISAR1::kI8MM(52); const IDRegister::Field AA64MMFR1::kLO(16); -const IDRegister::Field AA64MMFR2::kAT(32); - -const IDRegister::Field AA64ZFR0::kBF16(20); -const IDRegister::Field AA64ZFR0::kI8MM(44); -const IDRegister::Field AA64ZFR0::kF32MM(52); -const IDRegister::Field AA64ZFR0::kF64MM(56); - CPUFeatures AA64PFR0::GetCPUFeatures() const { CPUFeatures f; if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); - if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS); if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); - if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2); - if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM); - if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3); return f; } CPUFeatures AA64PFR1::GetCPUFeatures() const { CPUFeatures f; if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); - if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS); - if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); - if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); - if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); return f; } @@ -129,38 +105,20 @@ CPUFeatures AA64ISAR0::GetCPUFeatures() const { if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); - if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG); return f; } CPUFeatures AA64ISAR1::GetCPUFeatures() const { CPUFeatures f; if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); - if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP); if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); - if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); - if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); - if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); - if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); - if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); - - // Only one of these fields should be non-zero, but they have the same - // encodings, so merge the logic. - int apx = std::max(Get(kAPI), Get(kAPA)); - if (apx >= 1) { - f.Combine(CPUFeatures::kPAuth); - // APA (rather than API) indicates QARMA. - if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA); - if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC); - if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2); - if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC); - if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined); - } + if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth); + if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA); if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); if (Get(kGPA) >= 1) { f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); @@ -174,23 +132,6 @@ CPUFeatures AA64MMFR1::GetCPUFeatures() const { return f; } -CPUFeatures AA64MMFR2::GetCPUFeatures() const { - CPUFeatures f; - if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT); - return f; -} - -CPUFeatures AA64ZFR0::GetCPUFeatures() const { - // This register is only available with SVE, but reads-as-zero in its absence, - // so it's always safe to read it. - CPUFeatures f; - if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); - if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); - if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); - if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); - return f; -} - int IDRegister::Get(IDRegister::Field field) const { int msb = field.GetMsb(); int lsb = field.GetLsb(); @@ -208,8 +149,7 @@ int IDRegister::Get(IDRegister::Field field) const { CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { CPUFeatures f; -#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \ - f.Combine(Read##NAME().GetCPUFeatures()); +#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures()); VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) #undef VIXL_COMBINE_ID_REG return f; @@ -223,73 +163,49 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather // than explicit bits, but explicit bits allow us to identify features that // the toolchain doesn't know about. - static const CPUFeatures::Feature kFeatureBits[] = - {// Bits 0-7 - CPUFeatures::kFP, - CPUFeatures::kNEON, - CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. - CPUFeatures::kAES, - CPUFeatures::kPmull1Q, - CPUFeatures::kSHA1, - CPUFeatures::kSHA2, - CPUFeatures::kCRC32, - // Bits 8-15 - CPUFeatures::kAtomics, - CPUFeatures::kFPHalf, - CPUFeatures::kNEONHalf, - CPUFeatures::kIDRegisterEmulation, - CPUFeatures::kRDM, - CPUFeatures::kJSCVT, - CPUFeatures::kFcma, - CPUFeatures::kRCpc, - // Bits 16-23 - CPUFeatures::kDCPoP, - CPUFeatures::kSHA3, - CPUFeatures::kSM3, - CPUFeatures::kSM4, - CPUFeatures::kDotProduct, - CPUFeatures::kSHA512, - CPUFeatures::kSVE, - CPUFeatures::kFHM, - // Bits 24-31 - CPUFeatures::kDIT, - CPUFeatures::kUSCAT, - CPUFeatures::kRCpcImm, - CPUFeatures::kFlagM, - CPUFeatures::kSSBSControl, - CPUFeatures::kSB, - CPUFeatures::kPAuth, - CPUFeatures::kPAuthGeneric, - // Bits 32-39 - CPUFeatures::kDCCVADP, - CPUFeatures::kNone, // "sve2" - CPUFeatures::kNone, // "sveaes" - CPUFeatures::kNone, // "svepmull" - CPUFeatures::kNone, // "svebitperm" - CPUFeatures::kNone, // "svesha3" - CPUFeatures::kNone, // "svesm4" - CPUFeatures::kFrintToFixedSizedInt, - // Bits 40-47 - CPUFeatures::kSVEI8MM, - CPUFeatures::kSVEF32MM, - CPUFeatures::kSVEF64MM, - CPUFeatures::kSVEBF16, - CPUFeatures::kI8MM, - CPUFeatures::kBF16, - CPUFeatures::kDGH, - CPUFeatures::kRNG, - // Bits 48+ - CPUFeatures::kBTI}; - - uint64_t hwcap_low32 = getauxval(AT_HWCAP); - uint64_t hwcap_high32 = getauxval(AT_HWCAP2); - VIXL_ASSERT(IsUint32(hwcap_low32)); - VIXL_ASSERT(IsUint32(hwcap_high32)); - uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32); - - VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64); - for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) { - if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]); + static const CPUFeatures::Feature kFeatureBits[] = { + // Bits 0-7 + CPUFeatures::kFP, + CPUFeatures::kNEON, + CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. + CPUFeatures::kAES, + CPUFeatures::kPmull1Q, + CPUFeatures::kSHA1, + CPUFeatures::kSHA2, + CPUFeatures::kCRC32, + // Bits 8-15 + CPUFeatures::kAtomics, + CPUFeatures::kFPHalf, + CPUFeatures::kNEONHalf, + CPUFeatures::kIDRegisterEmulation, + CPUFeatures::kRDM, + CPUFeatures::kJSCVT, + CPUFeatures::kFcma, + CPUFeatures::kRCpc, + // Bits 16-23 + CPUFeatures::kDCPoP, + CPUFeatures::kSHA3, + CPUFeatures::kSM3, + CPUFeatures::kSM4, + CPUFeatures::kDotProduct, + CPUFeatures::kSHA512, + CPUFeatures::kSVE, + CPUFeatures::kFHM, + // Bits 24-27 + CPUFeatures::kDIT, + CPUFeatures::kUSCAT, + CPUFeatures::kRCpcImm, + CPUFeatures::kFlagM + // Bits 28-31 are unassigned. + }; + static const size_t kFeatureBitCount = + sizeof(kFeatureBits) / sizeof(kFeatureBits[0]); + + unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int) + + VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte)); + for (size_t i = 0; i < kFeatureBitCount; i++) { + if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]); } #endif // VIXL_USE_LINUX_HWCAP @@ -302,17 +218,17 @@ CPUFeatures CPU::InferCPUFeaturesFromOS( #ifdef __aarch64__ -#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ - NAME CPU::Read##NAME() { \ - uint64_t value = 0; \ - __asm__("mrs %0, " MRS_ARG : "=r"(value)); \ - return NAME(value); \ +#define VIXL_READ_ID_REG(NAME) \ + NAME CPU::Read##NAME() { \ + uint64_t value = 0; \ + __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \ + return NAME(value); \ } #else // __aarch64__ -#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ - NAME CPU::Read##NAME() { \ - VIXL_UNREACHABLE(); \ - return NAME(0); \ +#define VIXL_READ_ID_REG(NAME) \ + NAME CPU::Read##NAME() { \ + /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \ + VIXL_ABORT(); \ } #endif // __aarch64__ @@ -366,27 +282,6 @@ uint32_t CPU::GetCacheType() { } -// Query the SVE vector length. This requires CPUFeatures::kSVE. -int CPU::ReadSVEVectorLengthInBits() { -#ifdef __aarch64__ - uint64_t vl; - // To support compilers that don't understand `rdvl`, encode the value - // directly and move it manually. - __asm__( - " .word 0x04bf5100\n" // rdvl x0, #8 - " mov %[vl], x0\n" - : [vl] "=r"(vl) - : - : "x0"); - VIXL_ASSERT(vl <= INT_MAX); - return static_cast<int>(vl); -#else - VIXL_UNREACHABLE(); - return 0; -#endif -} - - void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) { #ifdef __aarch64__ // Implement the cache synchronisation for all targets where AArch64 is the diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h index 2bf1e60f..d2b2ee87 100644 --- a/src/aarch64/cpu-aarch64.h +++ b/src/aarch64/cpu-aarch64.h @@ -56,11 +56,7 @@ class IDRegister { public: enum Type { kUnsigned, kSigned }; - // This needs to be constexpr so that fields have "constant initialisation". - // This avoids initialisation order problems when these values are used to - // (dynamically) initialise static variables, etc. - explicit constexpr Field(int lsb, Type type = kUnsigned) - : lsb_(lsb), type_(type) {} + explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {} static const int kMaxWidthInBits = 4; @@ -96,11 +92,8 @@ class AA64PFR0 : public IDRegister { private: static const Field kFP; static const Field kAdvSIMD; - static const Field kRAS; static const Field kSVE; static const Field kDIT; - static const Field kCSV2; - static const Field kCSV3; }; class AA64PFR1 : public IDRegister { @@ -111,8 +104,6 @@ class AA64PFR1 : public IDRegister { private: static const Field kBT; - static const Field kSSBS; - static const Field kMTE; }; class AA64ISAR0 : public IDRegister { @@ -134,7 +125,6 @@ class AA64ISAR0 : public IDRegister { static const Field kDP; static const Field kFHM; static const Field kTS; - static const Field kRNDR; }; class AA64ISAR1 : public IDRegister { @@ -155,9 +145,6 @@ class AA64ISAR1 : public IDRegister { static const Field kFRINTTS; static const Field kSB; static const Field kSPECRES; - static const Field kBF16; - static const Field kDGH; - static const Field kI8MM; }; class AA64MMFR1 : public IDRegister { @@ -170,29 +157,6 @@ class AA64MMFR1 : public IDRegister { static const Field kLO; }; -class AA64MMFR2 : public IDRegister { - public: - explicit AA64MMFR2(uint64_t value) : IDRegister(value) {} - - CPUFeatures GetCPUFeatures() const; - - private: - static const Field kAT; -}; - -class AA64ZFR0 : public IDRegister { - public: - explicit AA64ZFR0(uint64_t value) : IDRegister(value) {} - - CPUFeatures GetCPUFeatures() const; - - private: - static const Field kBF16; - static const Field kI8MM; - static const Field kF32MM; - static const Field kF64MM; -}; - class CPU { public: // Initialise CPU support. @@ -220,9 +184,6 @@ class CPU { CPUFeatures::QueryIDRegistersOption option = CPUFeatures::kQueryIDRegistersIfAvailable); - // Query the SVE vector length. This requires CPUFeatures::kSVE. - static int ReadSVEVectorLengthInBits(); - // Handle tagged pointers. template <typename T> static T SetPointerTag(T pointer, uint64_t tag) { @@ -250,18 +211,14 @@ class CPU { } private: -#define VIXL_AARCH64_ID_REG_LIST(V) \ - V(AA64PFR0, "ID_AA64PFR0_EL1") \ - V(AA64PFR1, "ID_AA64PFR1_EL1") \ - V(AA64ISAR0, "ID_AA64ISAR0_EL1") \ - V(AA64ISAR1, "ID_AA64ISAR1_EL1") \ - V(AA64MMFR1, "ID_AA64MMFR1_EL1") \ - /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \ - /* read them, but some compilers don't accept the symbolic names. */ \ - V(AA64MMFR2, "S3_0_C0_C7_2") \ - V(AA64ZFR0, "S3_0_C0_C4_4") - -#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME(); +#define VIXL_AARCH64_ID_REG_LIST(V) \ + V(AA64PFR0) \ + V(AA64PFR1) \ + V(AA64ISAR0) \ + V(AA64ISAR1) \ + V(AA64MMFR1) + +#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME(); // On native AArch64 platforms, read the named CPU ID registers. These require // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64 // platforms. diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index abe63d39..474803a1 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -870,6 +870,7 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) { scope.Record(CPUFeatures::kFP); if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf); } + USE(instr); } void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) { @@ -1067,165 +1068,6 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) { USE(instr); } -// Most SVE visitors require only SVE. -#define VIXL_SIMPLE_SVE_VISITOR_LIST(V) \ - V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ - V(SVE32BitGatherLoad_VectorPlusImm) \ - V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherPrefetch_VectorPlusImm) \ - V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ - V(SVE32BitScatterStore_VectorPlusImm) \ - V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ - V(SVE64BitGatherLoad_VectorPlusImm) \ - V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ - V(SVE64BitGatherPrefetch_VectorPlusImm) \ - V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ - V(SVE64BitScatterStore_VectorPlusImm) \ - V(SVEAddressGeneration) \ - V(SVEBitwiseLogicalUnpredicated) \ - V(SVEBitwiseShiftUnpredicated) \ - V(SVEFFRInitialise) \ - V(SVEFFRWriteFromPredicate) \ - V(SVEFPAccumulatingReduction) \ - V(SVEFPArithmeticUnpredicated) \ - V(SVEFPCompareVectors) \ - V(SVEFPCompareWithZero) \ - V(SVEFPComplexAddition) \ - V(SVEFPComplexMulAdd) \ - V(SVEFPComplexMulAddIndex) \ - V(SVEFPFastReduction) \ - V(SVEFPMulIndex) \ - V(SVEFPMulAdd) \ - V(SVEFPMulAddIndex) \ - V(SVEFPUnaryOpUnpredicated) \ - V(SVEIncDecByPredicateCount) \ - V(SVEIndexGeneration) \ - V(SVEIntArithmeticUnpredicated) \ - V(SVEIntCompareSignedImm) \ - V(SVEIntCompareUnsignedImm) \ - V(SVEIntCompareVectors) \ - V(SVEIntMulAddPredicated) \ - V(SVEIntMulAddUnpredicated) \ - V(SVEIntReduction) \ - V(SVEIntUnaryArithmeticPredicated) \ - V(SVEMovprfx) \ - V(SVEMulIndex) \ - V(SVEPermuteVectorExtract) \ - V(SVEPermuteVectorInterleaving) \ - V(SVEPredicateCount) \ - V(SVEPredicateLogical) \ - V(SVEPropagateBreak) \ - V(SVEStackFrameAdjustment) \ - V(SVEStackFrameSize) \ - V(SVEVectorSelect) \ - V(SVEBitwiseLogical_Predicated) \ - V(SVEBitwiseLogicalWithImm_Unpredicated) \ - V(SVEBitwiseShiftByImm_Predicated) \ - V(SVEBitwiseShiftByVector_Predicated) \ - V(SVEBitwiseShiftByWideElements_Predicated) \ - V(SVEBroadcastBitmaskImm) \ - V(SVEBroadcastFPImm_Unpredicated) \ - V(SVEBroadcastGeneralRegister) \ - V(SVEBroadcastIndexElement) \ - V(SVEBroadcastIntImm_Unpredicated) \ - V(SVECompressActiveElements) \ - V(SVEConditionallyBroadcastElementToVector) \ - V(SVEConditionallyExtractElementToSIMDFPScalar) \ - V(SVEConditionallyExtractElementToGeneralRegister) \ - V(SVEConditionallyTerminateScalars) \ - V(SVEConstructivePrefix_Unpredicated) \ - V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ - V(SVEContiguousLoad_ScalarPlusImm) \ - V(SVEContiguousLoad_ScalarPlusScalar) \ - V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ - V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ - V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ - V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ - V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ - V(SVEContiguousPrefetch_ScalarPlusImm) \ - V(SVEContiguousPrefetch_ScalarPlusScalar) \ - V(SVEContiguousStore_ScalarPlusImm) \ - V(SVEContiguousStore_ScalarPlusScalar) \ - V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ - V(SVECopyFPImm_Predicated) \ - V(SVECopyGeneralRegisterToVector_Predicated) \ - V(SVECopyIntImm_Predicated) \ - V(SVEElementCount) \ - V(SVEExtractElementToSIMDFPScalarRegister) \ - V(SVEExtractElementToGeneralRegister) \ - V(SVEFPArithmetic_Predicated) \ - V(SVEFPArithmeticWithImm_Predicated) \ - V(SVEFPConvertPrecision) \ - V(SVEFPConvertToInt) \ - V(SVEFPExponentialAccelerator) \ - V(SVEFPRoundToIntegralValue) \ - V(SVEFPTrigMulAddCoefficient) \ - V(SVEFPTrigSelectCoefficient) \ - V(SVEFPUnaryOp) \ - V(SVEIncDecRegisterByElementCount) \ - V(SVEIncDecVectorByElementCount) \ - V(SVEInsertSIMDFPScalarRegister) \ - V(SVEInsertGeneralRegister) \ - V(SVEIntAddSubtractImm_Unpredicated) \ - V(SVEIntAddSubtractVectors_Predicated) \ - V(SVEIntCompareScalarCountAndLimit) \ - V(SVEIntConvertToFP) \ - V(SVEIntDivideVectors_Predicated) \ - V(SVEIntMinMaxImm_Unpredicated) \ - V(SVEIntMinMaxDifference_Predicated) \ - V(SVEIntMulImm_Unpredicated) \ - V(SVEIntMulVectors_Predicated) \ - V(SVELoadAndBroadcastElement) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ - V(SVELoadMultipleStructures_ScalarPlusImm) \ - V(SVELoadMultipleStructures_ScalarPlusScalar) \ - V(SVELoadPredicateRegister) \ - V(SVELoadVectorRegister) \ - V(SVEPartitionBreakCondition) \ - V(SVEPermutePredicateElements) \ - V(SVEPredicateFirstActive) \ - V(SVEPredicateInitialize) \ - V(SVEPredicateNextActive) \ - V(SVEPredicateReadFromFFR_Predicated) \ - V(SVEPredicateReadFromFFR_Unpredicated) \ - V(SVEPredicateTest) \ - V(SVEPredicateZero) \ - V(SVEPropagateBreakToNextPartition) \ - V(SVEReversePredicateElements) \ - V(SVEReverseVectorElements) \ - V(SVEReverseWithinElements) \ - V(SVESaturatingIncDecRegisterByElementCount) \ - V(SVESaturatingIncDecVectorByElementCount) \ - V(SVEStoreMultipleStructures_ScalarPlusImm) \ - V(SVEStoreMultipleStructures_ScalarPlusScalar) \ - V(SVEStorePredicateRegister) \ - V(SVEStoreVectorRegister) \ - V(SVETableLookup) \ - V(SVEUnpackPredicateElements) \ - V(SVEUnpackVectorElements) \ - V(SVEVectorSplice_Destructive) - -#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \ - void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \ - RecordInstructionFeaturesScope scope(this); \ - scope.Record(CPUFeatures::kSVE); \ - USE(instr); \ - } -VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR) -#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR -#undef VIXL_SIMPLE_SVE_VISITOR_LIST - void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) { RecordInstructionFeaturesScope scope(this); if (instr->Mask(SystemHintFMask) == SystemHintFixed) { diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc index c6859bbc..ce1f33fb 100644 --- a/src/aarch64/decoder-aarch64.cc +++ b/src/aarch64/decoder-aarch64.cc @@ -182,45 +182,22 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) { case M: \ bit_extract_fn = &Instruction::ExtractBits<M>; \ break; - INSTANTIATE_TEMPLATE(0x000001e0); - INSTANTIATE_TEMPLATE(0x00000400); INSTANTIATE_TEMPLATE(0x00000800); INSTANTIATE_TEMPLATE(0x00000c00); - INSTANTIATE_TEMPLATE(0x00001000); - INSTANTIATE_TEMPLATE(0x00001800); INSTANTIATE_TEMPLATE(0x00001c00); INSTANTIATE_TEMPLATE(0x00004000); INSTANTIATE_TEMPLATE(0x00008000); INSTANTIATE_TEMPLATE(0x0000f000); INSTANTIATE_TEMPLATE(0x0000fc00); - INSTANTIATE_TEMPLATE(0x00060010); - INSTANTIATE_TEMPLATE(0x00093e00); - INSTANTIATE_TEMPLATE(0x000c1000); - INSTANTIATE_TEMPLATE(0x00100000); - INSTANTIATE_TEMPLATE(0x00101800); - INSTANTIATE_TEMPLATE(0x00140000); - INSTANTIATE_TEMPLATE(0x00180000); - INSTANTIATE_TEMPLATE(0x00181000); - INSTANTIATE_TEMPLATE(0x00190000); - INSTANTIATE_TEMPLATE(0x00191400); - INSTANTIATE_TEMPLATE(0x001c0000); - INSTANTIATE_TEMPLATE(0x001c1800); INSTANTIATE_TEMPLATE(0x001f0000); INSTANTIATE_TEMPLATE(0x0020fc00); INSTANTIATE_TEMPLATE(0x0038f000); INSTANTIATE_TEMPLATE(0x00400000); - INSTANTIATE_TEMPLATE(0x00400010); INSTANTIATE_TEMPLATE(0x0040f000); - INSTANTIATE_TEMPLATE(0x00500000); INSTANTIATE_TEMPLATE(0x00800000); - INSTANTIATE_TEMPLATE(0x00800010); - INSTANTIATE_TEMPLATE(0x00801800); - INSTANTIATE_TEMPLATE(0x009f0000); INSTANTIATE_TEMPLATE(0x00c00000); - INSTANTIATE_TEMPLATE(0x00c00010); INSTANTIATE_TEMPLATE(0x00cf8000); INSTANTIATE_TEMPLATE(0x00db0000); - INSTANTIATE_TEMPLATE(0x00dc0000); INSTANTIATE_TEMPLATE(0x00e00003); INSTANTIATE_TEMPLATE(0x00f80400); INSTANTIATE_TEMPLATE(0x01e00000); @@ -256,7 +233,6 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) { INSTANTIATE_TEMPLATE(0xc4400000); INSTANTIATE_TEMPLATE(0xc4c00000); INSTANTIATE_TEMPLATE(0xe0400000); - INSTANTIATE_TEMPLATE(0xe120e000); INSTANTIATE_TEMPLATE(0xe3c00000); INSTANTIATE_TEMPLATE(0xf1200000); #undef INSTANTIATE_TEMPLATE @@ -283,44 +259,20 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) { instantiated = true; \ } INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000); - INSTANTIATE_TEMPLATE(0x00000210, 0x00000000); - INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000); - INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000); - INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000); INSTANTIATE_TEMPLATE(0x00003000, 0x00000000); INSTANTIATE_TEMPLATE(0x00007800, 0x00000000); - INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000); INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00030400, 0x00000000); INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d); - INSTANTIATE_TEMPLATE(0x00060210, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060810, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000); - INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000); - INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000); - INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400); - INSTANTIATE_TEMPLATE(0x00070200, 0x00000000); - INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000); INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000); - INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000); - INSTANTIATE_TEMPLATE(0x00180000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00180000, 0x00100000); - INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000); - INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000); INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000); INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000); INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000); INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000); INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000); - INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000); INSTANTIATE_TEMPLATE(0x00780000, 0x00000000); - INSTANTIATE_TEMPLATE(0x00870210, 0x00000000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000); INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000); - INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000); - INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000); INSTANTIATE_TEMPLATE(0x01000010, 0x00000000); INSTANTIATE_TEMPLATE(0x20000800, 0x00000000); INSTANTIATE_TEMPLATE(0x20008000, 0x00000000); @@ -360,16 +312,14 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) { bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // EitherOr optimisation: if there are only one or two patterns in the table, // try to optimise the node to exploit that. - size_t table_size = pattern_table_.size(); - if ((table_size <= 2) && (GetSampledBitsCount() > 1)) { + if ((pattern_table_.size() == 2) && (GetSampledBitsCount() > 1)) { // TODO: support 'x' in this optimisation by dropping the sampled bit // positions before making the mask/value. if ((strchr(pattern_table_[0].pattern, 'x') == NULL) && - ((table_size == 1) || - (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) { + (strcmp(pattern_table_[1].pattern, "otherwise") == 0)) { // A pattern table consisting of a fixed pattern with no x's, and an - // "otherwise" or absent case. Optimise this into an instruction mask and - // value test. + // "otherwise" case. Optimise this into an instruction mask and value + // test. uint32_t single_decode_mask = 0; uint32_t single_decode_value = 0; std::vector<uint8_t> bits = GetSampledBits(); @@ -382,6 +332,7 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { single_decode_value |= 1U << bits[i]; } } + BitExtractFn bit_extract_fn = GetBitExtractFunction(single_decode_mask, single_decode_value); @@ -391,9 +342,7 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { // Set DecodeNode for when the instruction after masking doesn't match the // value. - const char* doesnt_match_handler = - (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler; - CompileNodeForBits(decoder, doesnt_match_handler, 0); + CompileNodeForBits(decoder, pattern_table_[1].handler, 0); // Set DecodeNode for when it does match. CompileNodeForBits(decoder, pattern_table_[0].handler, 1); diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h index 38540195..c0f47c36 100644 --- a/src/aarch64/decoder-aarch64.h +++ b/src/aarch64/decoder-aarch64.h @@ -38,239 +38,99 @@ // List macro containing all visitors needed by the decoder class. -#define VISITOR_LIST_THAT_RETURN(V) \ - V(AddSubExtended) \ - V(AddSubImmediate) \ - V(AddSubShifted) \ - V(AddSubWithCarry) \ - V(AtomicMemory) \ - V(Bitfield) \ - V(CompareBranch) \ - V(ConditionalBranch) \ - V(ConditionalCompareImmediate) \ - V(ConditionalCompareRegister) \ - V(ConditionalSelect) \ - V(Crypto2RegSHA) \ - V(Crypto3RegSHA) \ - V(CryptoAES) \ - V(DataProcessing1Source) \ - V(DataProcessing2Source) \ - V(DataProcessing3Source) \ - V(EvaluateIntoFlags) \ - V(Exception) \ - V(Extract) \ - V(FPCompare) \ - V(FPConditionalCompare) \ - V(FPConditionalSelect) \ - V(FPDataProcessing1Source) \ - V(FPDataProcessing2Source) \ - V(FPDataProcessing3Source) \ - V(FPFixedPointConvert) \ - V(FPImmediate) \ - V(FPIntegerConvert) \ - V(LoadLiteral) \ - V(LoadStoreExclusive) \ - V(LoadStorePAC) \ - V(LoadStorePairNonTemporal) \ - V(LoadStorePairOffset) \ - V(LoadStorePairPostIndex) \ - V(LoadStorePairPreIndex) \ - V(LoadStorePostIndex) \ - V(LoadStorePreIndex) \ - V(LoadStoreRCpcUnscaledOffset) \ - V(LoadStoreRegisterOffset) \ - V(LoadStoreUnscaledOffset) \ - V(LoadStoreUnsignedOffset) \ - V(LogicalImmediate) \ - V(LogicalShifted) \ - V(MoveWideImmediate) \ - V(NEON2RegMisc) \ - V(NEON2RegMiscFP16) \ - V(NEON3Different) \ - V(NEON3Same) \ - V(NEON3SameExtra) \ - V(NEON3SameFP16) \ - V(NEONAcrossLanes) \ - V(NEONByIndexedElement) \ - V(NEONCopy) \ - V(NEONExtract) \ - V(NEONLoadStoreMultiStruct) \ - V(NEONLoadStoreMultiStructPostIndex) \ - V(NEONLoadStoreSingleStruct) \ - V(NEONLoadStoreSingleStructPostIndex) \ - V(NEONModifiedImmediate) \ - V(NEONPerm) \ - V(NEONScalar2RegMisc) \ - V(NEONScalar2RegMiscFP16) \ - V(NEONScalar3Diff) \ - V(NEONScalar3Same) \ - V(NEONScalar3SameExtra) \ - V(NEONScalar3SameFP16) \ - V(NEONScalarByIndexedElement) \ - V(NEONScalarCopy) \ - V(NEONScalarPairwise) \ - V(NEONScalarShiftImmediate) \ - V(NEONShiftImmediate) \ - V(NEONTable) \ - V(PCRelAddressing) \ - V(RotateRightIntoFlags) \ - V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ - V(SVE32BitGatherLoad_VectorPlusImm) \ - V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitGatherPrefetch_VectorPlusImm) \ - V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ - V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ - V(SVE32BitScatterStore_VectorPlusImm) \ - V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ - V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ - V(SVE64BitGatherLoad_VectorPlusImm) \ - V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ - V(SVE64BitGatherPrefetch_VectorPlusImm) \ - V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ - V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ - V(SVE64BitScatterStore_VectorPlusImm) \ - V(SVEAddressGeneration) \ - V(SVEBitwiseLogicalUnpredicated) \ - V(SVEBitwiseShiftUnpredicated) \ - V(SVEFFRInitialise) \ - V(SVEFFRWriteFromPredicate) \ - V(SVEFPAccumulatingReduction) \ - V(SVEFPArithmeticUnpredicated) \ - V(SVEFPCompareVectors) \ - V(SVEFPCompareWithZero) \ - V(SVEFPComplexAddition) \ - V(SVEFPComplexMulAdd) \ - V(SVEFPComplexMulAddIndex) \ - V(SVEFPFastReduction) \ - V(SVEFPMulIndex) \ - V(SVEFPMulAdd) \ - V(SVEFPMulAddIndex) \ - V(SVEFPUnaryOpUnpredicated) \ - V(SVEIncDecByPredicateCount) \ - V(SVEIndexGeneration) \ - V(SVEIntArithmeticUnpredicated) \ - V(SVEIntCompareSignedImm) \ - V(SVEIntCompareUnsignedImm) \ - V(SVEIntCompareVectors) \ - V(SVEIntMulAddPredicated) \ - V(SVEIntMulAddUnpredicated) \ - V(SVEIntReduction) \ - V(SVEIntUnaryArithmeticPredicated) \ - V(SVEMovprfx) \ - V(SVEMulIndex) \ - V(SVEPermuteVectorExtract) \ - V(SVEPermuteVectorInterleaving) \ - V(SVEPredicateCount) \ - V(SVEPredicateLogical) \ - V(SVEPropagateBreak) \ - V(SVEStackFrameAdjustment) \ - V(SVEStackFrameSize) \ - V(SVEVectorSelect) \ - V(SVEBitwiseLogical_Predicated) \ - V(SVEBitwiseLogicalWithImm_Unpredicated) \ - V(SVEBitwiseShiftByImm_Predicated) \ - V(SVEBitwiseShiftByVector_Predicated) \ - V(SVEBitwiseShiftByWideElements_Predicated) \ - V(SVEBroadcastBitmaskImm) \ - V(SVEBroadcastFPImm_Unpredicated) \ - V(SVEBroadcastGeneralRegister) \ - V(SVEBroadcastIndexElement) \ - V(SVEBroadcastIntImm_Unpredicated) \ - V(SVECompressActiveElements) \ - V(SVEConditionallyBroadcastElementToVector) \ - V(SVEConditionallyExtractElementToSIMDFPScalar) \ - V(SVEConditionallyExtractElementToGeneralRegister) \ - V(SVEConditionallyTerminateScalars) \ - V(SVEConstructivePrefix_Unpredicated) \ - V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ - V(SVEContiguousLoad_ScalarPlusImm) \ - V(SVEContiguousLoad_ScalarPlusScalar) \ - V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ - V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ - V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ - V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ - V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ - V(SVEContiguousPrefetch_ScalarPlusImm) \ - V(SVEContiguousPrefetch_ScalarPlusScalar) \ - V(SVEContiguousStore_ScalarPlusImm) \ - V(SVEContiguousStore_ScalarPlusScalar) \ - V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ - V(SVECopyFPImm_Predicated) \ - V(SVECopyGeneralRegisterToVector_Predicated) \ - V(SVECopyIntImm_Predicated) \ - V(SVEElementCount) \ - V(SVEExtractElementToSIMDFPScalarRegister) \ - V(SVEExtractElementToGeneralRegister) \ - V(SVEFPArithmetic_Predicated) \ - V(SVEFPArithmeticWithImm_Predicated) \ - V(SVEFPConvertPrecision) \ - V(SVEFPConvertToInt) \ - V(SVEFPExponentialAccelerator) \ - V(SVEFPRoundToIntegralValue) \ - V(SVEFPTrigMulAddCoefficient) \ - V(SVEFPTrigSelectCoefficient) \ - V(SVEFPUnaryOp) \ - V(SVEIncDecRegisterByElementCount) \ - V(SVEIncDecVectorByElementCount) \ - V(SVEInsertSIMDFPScalarRegister) \ - V(SVEInsertGeneralRegister) \ - V(SVEIntAddSubtractImm_Unpredicated) \ - V(SVEIntAddSubtractVectors_Predicated) \ - V(SVEIntCompareScalarCountAndLimit) \ - V(SVEIntConvertToFP) \ - V(SVEIntDivideVectors_Predicated) \ - V(SVEIntMinMaxImm_Unpredicated) \ - V(SVEIntMinMaxDifference_Predicated) \ - V(SVEIntMulImm_Unpredicated) \ - V(SVEIntMulVectors_Predicated) \ - V(SVELoadAndBroadcastElement) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \ - V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \ - V(SVELoadMultipleStructures_ScalarPlusImm) \ - V(SVELoadMultipleStructures_ScalarPlusScalar) \ - V(SVELoadPredicateRegister) \ - V(SVELoadVectorRegister) \ - V(SVEPartitionBreakCondition) \ - V(SVEPermutePredicateElements) \ - V(SVEPredicateFirstActive) \ - V(SVEPredicateInitialize) \ - V(SVEPredicateNextActive) \ - V(SVEPredicateReadFromFFR_Predicated) \ - V(SVEPredicateReadFromFFR_Unpredicated) \ - V(SVEPredicateTest) \ - V(SVEPredicateZero) \ - V(SVEPropagateBreakToNextPartition) \ - V(SVEReversePredicateElements) \ - V(SVEReverseVectorElements) \ - V(SVEReverseWithinElements) \ - V(SVESaturatingIncDecRegisterByElementCount) \ - V(SVESaturatingIncDecVectorByElementCount) \ - V(SVEStoreMultipleStructures_ScalarPlusImm) \ - V(SVEStoreMultipleStructures_ScalarPlusScalar) \ - V(SVEStorePredicateRegister) \ - V(SVEStoreVectorRegister) \ - V(SVETableLookup) \ - V(SVEUnpackPredicateElements) \ - V(SVEUnpackVectorElements) \ - V(SVEVectorSplice_Destructive) \ - V(System) \ - V(TestBranch) \ - V(Unallocated) \ - V(UnconditionalBranch) \ - V(UnconditionalBranchToRegister) \ +#define VISITOR_LIST_THAT_RETURN(V) \ + V(AddSubExtended) \ + V(AddSubImmediate) \ + V(AddSubShifted) \ + V(AddSubWithCarry) \ + V(AtomicMemory) \ + V(Bitfield) \ + V(CompareBranch) \ + V(ConditionalBranch) \ + V(ConditionalCompareImmediate) \ + V(ConditionalCompareRegister) \ + V(ConditionalSelect) \ + V(Crypto2RegSHA) \ + V(Crypto3RegSHA) \ + V(CryptoAES) \ + V(DataProcessing1Source) \ + V(DataProcessing2Source) \ + V(DataProcessing3Source) \ + V(Exception) \ + V(Extract) \ + V(EvaluateIntoFlags) \ + V(FPCompare) \ + V(FPConditionalCompare) \ + V(FPConditionalSelect) \ + V(FPDataProcessing1Source) \ + V(FPDataProcessing2Source) \ + V(FPDataProcessing3Source) \ + V(FPFixedPointConvert) \ + V(FPImmediate) \ + V(FPIntegerConvert) \ + V(LoadLiteral) \ + V(LoadStoreExclusive) \ + V(LoadStorePAC) \ + V(LoadStorePairNonTemporal) \ + V(LoadStorePairOffset) \ + V(LoadStorePairPostIndex) \ + V(LoadStorePairPreIndex) \ + V(LoadStorePostIndex) \ + V(LoadStorePreIndex) \ + V(LoadStoreRCpcUnscaledOffset) \ + V(LoadStoreRegisterOffset) \ + V(LoadStoreUnscaledOffset) \ + V(LoadStoreUnsignedOffset) \ + V(LogicalImmediate) \ + V(LogicalShifted) \ + V(MoveWideImmediate) \ + V(NEON2RegMisc) \ + V(NEON2RegMiscFP16) \ + V(NEON3Different) \ + V(NEON3Same) \ + V(NEON3SameExtra) \ + V(NEON3SameFP16) \ + V(NEONAcrossLanes) \ + V(NEONByIndexedElement) \ + V(NEONCopy) \ + V(NEONExtract) \ + V(NEONLoadStoreMultiStruct) \ + V(NEONLoadStoreMultiStructPostIndex) \ + V(NEONLoadStoreSingleStruct) \ + V(NEONLoadStoreSingleStructPostIndex) \ + V(NEONModifiedImmediate) \ + V(NEONPerm) \ + V(NEONScalar2RegMisc) \ + V(NEONScalar2RegMiscFP16) \ + V(NEONScalar3Diff) \ + V(NEONScalar3Same) \ + V(NEONScalar3SameExtra) \ + V(NEONScalar3SameFP16) \ + V(NEONScalarByIndexedElement) \ + V(NEONScalarCopy) \ + V(NEONScalarPairwise) \ + V(NEONScalarShiftImmediate) \ + V(NEONShiftImmediate) \ + V(NEONTable) \ + V(PCRelAddressing) \ + V(RotateRightIntoFlags) \ + V(System) \ + V(TestBranch) \ + V(UnconditionalBranch) \ + V(UnconditionalBranchToRegister) + +// TODO: We shouldn't expose debug-only behaviour like this. Instead, we should +// use release-mode aborts where appropriate, and merge thse into a single +// no-return list. +#define VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \ + V(Unallocated) \ V(Unimplemented) #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved) -#define VISITOR_LIST(V) \ - VISITOR_LIST_THAT_RETURN(V) \ +#define VISITOR_LIST(V) \ + VISITOR_LIST_THAT_RETURN(V) \ + VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \ VISITOR_LIST_THAT_DONT_RETURN(V) namespace vixl { @@ -278,12 +138,6 @@ namespace aarch64 { // The Visitor interface. Disassembler and simulator (and other tools) // must provide implementations for all of these functions. -// -// Note that this class must change in breaking ways with even minor additions -// to VIXL, and so its API should be considered unstable. User classes that -// inherit from this one should be expected to break even on minor version -// updates. If this is a problem, consider using DecoderVisitorWithDefaults -// instead. class DecoderVisitor { public: enum VisitorConstness { kConstVisitor, kNonConstVisitor }; @@ -306,25 +160,6 @@ class DecoderVisitor { const VisitorConstness constness_; }; -// As above, but a default (no-op) implementation for each visitor is provided. -// This is useful for derived class that only care about specific visitors. -// -// A minor version update may add a visitor, but will never remove one, so it is -// safe (and recommended) to use `override` in derived classes. -class DecoderVisitorWithDefaults : public DecoderVisitor { - public: - explicit DecoderVisitorWithDefaults( - VisitorConstness constness = kConstVisitor) - : DecoderVisitor(constness) {} - - virtual ~DecoderVisitorWithDefaults() {} - -#define DECLARE(A) \ - virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); } - VISITOR_LIST(DECLARE) -#undef DECLARE -}; - class DecodeNode; class CompiledDecodeNode; @@ -422,7 +257,7 @@ class Decoder { }; const int kMaxDecodeSampledBits = 16; -const int kMaxDecodeMappings = 100; +const int kMaxDecodeMappings = 22; typedef void (Decoder::*DecodeFnPtr)(const Instruction*); typedef uint32_t (Instruction::*BitExtractFn)(void) const; diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h index 53f283bb..def27fa1 100644 --- a/src/aarch64/decoder-constants-aarch64.h +++ b/src/aarch64/decoder-constants-aarch64.h @@ -39,7 +39,6 @@ static const DecodeMapping kDecodeMapping[] = { { "Root", {28, 27, 26, 25}, { {"0000", "DecodeReserved"}, - {"0010", "DecodeSVE"}, {"100x", "DecodeDataProcessingImmediate"}, {"101x", "DecodeBranchesExceptionAndSystem"}, {"x1x0", "DecodeLoadsAndStores"}, @@ -125,720 +124,6 @@ static const DecodeMapping kDecodeMapping[] = { }, }, - { "DecodeSVE", - {31, 30, 29, 24, 21, 15, 14, 13}, - { {"00000x1x", "VisitSVEIntMulAddPredicated"}, - {"00000000", "DecodeSVE00000000"}, - {"00000001", "DecodeSVE00000001"}, - {"00000100", "DecodeSVE00000100"}, - {"00000101", "VisitSVEIntUnaryArithmeticPredicated"}, - {"00001000", "VisitSVEIntArithmeticUnpredicated"}, - {"00001001", "VisitSVEBitwiseLogicalUnpredicated"}, - {"00001010", "DecodeSVE00001010"}, - {"00001100", "VisitSVEBitwiseShiftUnpredicated"}, - {"00001101", "DecodeSVE00001101"}, - {"00001110", "DecodeSVE00001110"}, - {"00001111", "DecodeSVE00001111"}, - {"000100xx", "DecodeSVE000100xx"}, - {"0001010x", "DecodeSVE0001010x"}, - {"00010110", "DecodeSVE00010110"}, - {"00010111", "DecodeSVE00010111"}, - {"00011000", "VisitSVEPermuteVectorExtract"}, - {"00011001", "DecodeSVE00011001"}, - {"00011010", "DecodeSVE00011010"}, - {"00011011", "VisitSVEPermuteVectorInterleaving"}, - {"00011100", "DecodeSVE00011100"}, - {"00011101", "DecodeSVE00011101"}, - {"0001111x", "VisitSVEVectorSelect"}, - {"00100xxx", "VisitSVEIntCompareVectors"}, - {"00101xxx", "VisitSVEIntCompareUnsignedImm"}, - {"00110x0x", "VisitSVEIntCompareSignedImm"}, - {"0011001x", "DecodeSVE0011001x"}, - {"00110110", "DecodeSVE00110110"}, - {"00110111", "DecodeSVE00110111"}, - {"00111000", "VisitSVEIntCompareScalarCountAndLimit"}, - {"00111001", "UnallocSVEConditionallyTerminateScalars"}, - {"00111100", "DecodeSVE00111100"}, - {"00111101", "UnallocSVEPredicateCount"}, - {"0011111x", "DecodeSVE0011111x"}, - {"010000xx", "VisitSVEIntMulAddUnpredicated"}, - {"01001xxx", "VisitSVEMulIndex"}, - {"011000xx", "VisitSVEFPComplexMulAdd"}, - {"01100100", "UnallocSVEFPComplexAddition"}, - {"01101000", "DecodeSVE01101000"}, - {"01101001", "UnallocSVEFPMulIndex"}, - {"01110x1x", "VisitSVEFPCompareVectors"}, - {"01110000", "VisitSVEFPArithmeticUnpredicated"}, - {"01110001", "DecodeSVE01110001"}, - {"01110100", "DecodeSVE01110100"}, - {"01110101", "DecodeSVE01110101"}, - {"01111xxx", "VisitSVEFPMulAdd"}, - {"100x010x", "UnallocSVELoadAndBroadcastElement"}, - {"100x0110", "DecodeSVE100x0110"}, - {"100x0111", "DecodeSVE100x0111"}, - {"100x11xx", "DecodeSVE100x11xx"}, - {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"100010xx", "DecodeSVE100010xx"}, - {"100100x1", "DecodeSVE100100x1"}, - {"10010000", "DecodeSVE10010000"}, - {"10010010", "DecodeSVE10010010"}, - {"100110x1", "DecodeSVE100110x1"}, - {"10011000", "DecodeSVE10011000"}, - {"10011010", "DecodeSVE10011010"}, - {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"}, - {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"}, - {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, - {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"}, - {"101xx101", "DecodeSVE101xx101"}, - {"101x0110", "DecodeSVE101x0110"}, - {"101x0111", "DecodeSVE101x0111"}, - {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, - {"101x1111", "DecodeSVE101x1111"}, - {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"}, - {"110x0111", "DecodeSVE110x0111"}, - {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"110010xx", "DecodeSVE110010xx"}, - {"110011xx", "DecodeSVE110011xx"}, - {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, - {"110111xx", "DecodeSVE110111xx"}, - {"111x0011", "DecodeSVE111x0011"}, - {"111x01x0", "DecodeSVE111x01x0"}, - {"111x0101", "DecodeSVE111x0101"}, - {"111x0111", "DecodeSVE111x0111"}, - {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, - {"111x11x0", "DecodeSVE111x11x0"}, - {"111x1101", "DecodeSVE111x1101"}, - {"111x1111", "DecodeSVE111x1111"}, - {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"}, - {"1111x000", "UnallocSVEStorePredicateRegister"}, - {"1111x010", "DecodeSVE1111x010"}, - }, - }, - - { "DecodeSVE00000000", - {20, 19, 18}, - { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"}, - {"01x", "VisitSVEIntMinMaxDifference_Predicated"}, - {"100", "VisitSVEIntMulVectors_Predicated"}, - {"101", "VisitSVEIntDivideVectors_Predicated"}, - {"11x", "VisitSVEBitwiseLogical_Predicated"}, - }, - }, - - { "DecodeSVE00000100", - {20, 19}, - { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"}, - {"10", "VisitSVEBitwiseShiftByVector_Predicated"}, - {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"}, - }, - }, - - { "DecodeSVE00001010", - {23, 12, 11}, - { {"x0x", "VisitSVEIndexGeneration"}, - {"010", "VisitSVEStackFrameAdjustment"}, - {"110", "UnallocSVEStackFrameSize"}, - }, - }, - - { "UnallocSVEStackFrameSize", - {22, 20, 19, 18, 17, 16}, - { {"011111", "VisitSVEStackFrameSize"}, - }, - }, - - { "DecodeSVE00001101", - {12, 11, 10}, - { {"0xx", "VisitSVEAddressGeneration"}, - {"10x", "VisitSVEFPTrigSelectCoefficient"}, - {"110", "VisitSVEFPExponentialAccelerator"}, - {"111", "VisitSVEConstructivePrefix_Unpredicated"}, - }, - }, - - { "DecodeSVE00001110", - {20, 12, 11}, - { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"}, - {"100", "VisitSVEIncDecVectorByElementCount"}, - }, - }, - - { "DecodeSVE00001111", - {20, 12, 11}, - { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"}, - {"000", "VisitSVEElementCount"}, - {"100", "VisitSVEIncDecRegisterByElementCount"}, - }, - }, - - { "DecodeSVE000100xx", - {23, 22, 20, 19, 18}, - { {"xx1xx", "VisitSVECopyIntImm_Predicated"}, - {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, - }, - }, - - { "DecodeSVE0001010x", - {23, 22, 20, 19, 18}, - { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, - }, - }, - - { "DecodeSVE00010110", - {23, 22, 20, 19, 18}, - { {"xx1xx", "VisitSVECopyFPImm_Predicated"}, - {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, - }, - }, - - { "DecodeSVE00010111", - {23, 22, 20, 19, 18}, - { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"}, - {"11000", "VisitSVEBroadcastBitmaskImm"}, - }, - }, - - { "UnallocSVEBroadcastIndexElement", - {10}, - { {"0", "VisitSVEBroadcastIndexElement"}, - }, - }, - - { "UnallocSVETableLookup", - {10}, - { {"0", "VisitSVETableLookup"}, - }, - }, - - { "UnallocSVEBroadcastGeneralRegister", - {17, 16, 10}, - { {"000", "VisitSVEBroadcastGeneralRegister"}, - }, - }, - - { "UnallocSVEInsertGeneralRegister", - {17, 16, 10}, - { {"000", "VisitSVEInsertGeneralRegister"}, - }, - }, - - { "UnallocSVEUnpackVectorElements", - {10}, - { {"0", "VisitSVEUnpackVectorElements"}, - }, - }, - - { "UnallocSVEInsertSIMDFPScalarRegister", - {17, 16, 10}, - { {"000", "VisitSVEInsertSIMDFPScalarRegister"}, - }, - }, - - { "UnallocSVEReverseVectorElements", - {17, 16, 10}, - { {"000", "VisitSVEReverseVectorElements"}, - }, - }, - - { "DecodeSVE00011001", - {20, 19, 18, 12, 11}, - { {"xxx00", "UnallocSVEBroadcastIndexElement"}, - {"xxx10", "UnallocSVETableLookup"}, - {"00011", "UnallocSVEBroadcastGeneralRegister"}, - {"00111", "UnallocSVEInsertGeneralRegister"}, - {"10011", "UnallocSVEUnpackVectorElements"}, - {"10111", "UnallocSVEInsertSIMDFPScalarRegister"}, - {"11011", "UnallocSVEReverseVectorElements"}, - }, - }, - - { "UnallocSVEPermutePredicateElements", - {9, 4}, - { {"00", "VisitSVEPermutePredicateElements"}, - }, - }, - - { "UnallocSVEUnpackPredicateElements", - {23, 22, 19, 17, 12, 11, 10, 9, 4}, - { {"000000000", "VisitSVEUnpackPredicateElements"}, - }, - }, - - { "UnallocSVEReversePredicateElements", - {19, 17, 16, 12, 11, 10, 9, 4}, - { {"00000000", "VisitSVEReversePredicateElements"}, - }, - }, - - { "DecodeSVE00011010", - {20, 18}, - { {"0x", "UnallocSVEPermutePredicateElements"}, - {"10", "UnallocSVEUnpackPredicateElements"}, - {"11", "UnallocSVEReversePredicateElements"}, - }, - }, - - { "DecodeSVE00011100", - {23, 20, 19, 18, 17, 16}, - { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"}, - {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"}, - {"x001xx", "VisitSVEReverseWithinElements"}, - {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"}, - {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"}, - {"x01100", "VisitSVEVectorSplice_Destructive"}, - {"100001", "VisitSVECompressActiveElements"}, - }, - }, - - { "DecodeSVE00011101", - {20, 19, 18, 17, 16}, - { {"0000x", "VisitSVEExtractElementToGeneralRegister"}, - {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"}, - {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"}, - }, - }, - - { "UnallocSVEPartitionBreakCondition", - {18, 17, 16, 9}, - { {"0000", "VisitSVEPartitionBreakCondition"}, - }, - }, - - { "UnallocSVEPropagateBreakToNextPartition", - {23, 18, 17, 16, 9, 4}, - { {"000000", "VisitSVEPropagateBreakToNextPartition"}, - }, - }, - - { "DecodeSVE0011001x", - {20, 19}, - { {"0x", "VisitSVEPredicateLogical"}, - {"10", "UnallocSVEPartitionBreakCondition"}, - {"11", "UnallocSVEPropagateBreakToNextPartition"}, - }, - }, - - { "UnallocSVEPredicateTest", - {18, 17, 9, 4}, - { {"0000", "VisitSVEPredicateTest"}, - }, - }, - - { "UnallocSVEPredicateFirstActive", - {18, 17, 12, 11, 10, 9, 4}, - { {"0000000", "VisitSVEPredicateFirstActive"}, - }, - }, - - { "UnallocSVEPredicateNextActive", - {18, 17, 12, 11, 10, 9, 4}, - { {"0000100", "VisitSVEPredicateNextActive"}, - }, - }, - - { "DecodeSVE00110110", - {20, 19, 16}, - { {"0xx", "VisitSVEPropagateBreak"}, - {"100", "UnallocSVEPredicateTest"}, - {"110", "UnallocSVEPredicateFirstActive"}, - {"111", "UnallocSVEPredicateNextActive"}, - }, - }, - - { "UnallocSVEPredicateTest", - {18, 17, 9, 4}, - { {"0000", "VisitSVEPredicateTest"}, - }, - }, - - { "UnallocSVEPredicateInitialize", - {18, 17, 11, 4}, - { {"0000", "VisitSVEPredicateInitialize"}, - }, - }, - - { "UnallocSVEPredicateZero", - {18, 17, 11, 9, 8, 7, 6, 5, 4}, - { {"000000000", "VisitSVEPredicateZero"}, - }, - }, - - { "UnallocSVEPredicateReadFromFFR_Predicated", - {18, 17, 11, 9, 4}, - { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"}, - }, - }, - - { "UnallocSVEPredicateReadFromFFR_Unpredicated", - {18, 17, 11, 9, 8, 7, 6, 5, 4}, - { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"}, - }, - }, - - { "DecodeSVE00110111", - {20, 19, 16, 12, 10}, - { {"0xxxx", "VisitSVEPropagateBreak"}, - {"100xx", "UnallocSVEPredicateTest"}, - {"11x00", "UnallocSVEPredicateInitialize"}, - {"11001", "UnallocSVEPredicateZero"}, - {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"}, - {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"}, - }, - }, - - { "UnallocSVEConditionallyTerminateScalars", - {12, 11, 10, 3, 2, 1, 0}, - { {"0000000", "VisitSVEConditionallyTerminateScalars"}, - }, - }, - - { "UnallocSVEPredicateCount_2", - {20}, - { {"0", "VisitSVEPredicateCount"}, - }, - }, - - { "UnallocSVEIncDecByPredicateCount", - {20}, - { {"0", "VisitSVEIncDecByPredicateCount"}, - }, - }, - - { "UnallocSVEFFRWriteFromPredicate", - {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0}, - { {"00000000000", "VisitSVEFFRWriteFromPredicate"}, - }, - }, - - { "UnallocSVEFFRInitialise", - {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, - { {"000000000000000", "VisitSVEFFRInitialise"}, - }, - }, - - { "DecodeSVE00111100", - {19, 18, 12}, - { {"0xx", "UnallocSVEPredicateCount_2"}, - {"1x0", "UnallocSVEIncDecByPredicateCount"}, - {"101", "UnallocSVEFFRWriteFromPredicate"}, - {"111", "UnallocSVEFFRInitialise"}, - }, - }, - - { "UnallocSVEPredicateCount", - {20, 19}, - { {"00", "VisitSVEPredicateCount"}, - }, - }, - - { "DecodeSVE0011111x", - {20, 19, 16}, - { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"}, - {"01x", "VisitSVEIntMinMaxImm_Unpredicated"}, - {"10x", "VisitSVEIntMulImm_Unpredicated"}, - {"110", "VisitSVEBroadcastIntImm_Unpredicated"}, - {"111", "VisitSVEBroadcastFPImm_Unpredicated"}, - }, - }, - - { "UnallocSVEFPComplexAddition", - {20, 19, 18, 17}, - { {"0000", "VisitSVEFPComplexAddition"}, - }, - }, - - { "DecodeSVE01101000", - {12, 11}, - { {"00", "VisitSVEFPMulAddIndex"}, - {"1x", "VisitSVEFPComplexMulAddIndex"}, - }, - }, - - { "UnallocSVEFPMulIndex", - {12, 11, 10}, - { {"000", "VisitSVEFPMulIndex"}, - }, - }, - - { "DecodeSVE01110001", - {20, 19, 12}, - { {"00x", "VisitSVEFPFastReduction"}, - {"011", "VisitSVEFPUnaryOpUnpredicated"}, - {"10x", "VisitSVEFPCompareWithZero"}, - {"11x", "VisitSVEFPAccumulatingReduction"}, - }, - }, - - { "UnallocSVEFPTrigMulAddCoefficient", - {12, 11, 10}, - { {"000", "VisitSVEFPTrigMulAddCoefficient"}, - }, - }, - - { "UnallocSVEFPArithmeticWithImm_Predicated", - {9, 8, 7, 6}, - { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"}, - }, - }, - - { "DecodeSVE01110100", - {20, 19}, - { {"0x", "VisitSVEFPArithmetic_Predicated"}, - {"10", "UnallocSVEFPTrigMulAddCoefficient"}, - {"11", "UnallocSVEFPArithmeticWithImm_Predicated"}, - }, - }, - - { "DecodeSVE01110101", - {20, 19, 18}, - { {"00x", "VisitSVEFPRoundToIntegralValue"}, - {"010", "VisitSVEFPConvertPrecision"}, - {"011", "VisitSVEFPUnaryOp"}, - {"10x", "VisitSVEIntConvertToFP"}, - {"11x", "VisitSVEFPConvertToInt"}, - }, - }, - - { "UnallocSVELoadAndBroadcastElement", - {22}, - { {"1", "VisitSVELoadAndBroadcastElement"}, - }, - }, - - { "DecodeSVE100x0110", - {22, 4}, - { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"}, - {"1x", "VisitSVELoadAndBroadcastElement"}, - }, - }, - - { "DecodeSVE100x0111", - {22, 4}, - { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"}, - {"1x", "VisitSVELoadAndBroadcastElement"}, - }, - }, - - { "DecodeSVE100x11xx", - {22}, - { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"}, - {"1", "VisitSVELoadAndBroadcastElement"}, - }, - }, - - { "DecodeSVE100010xx", - {23, 4}, - { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"}, - {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"}, - }, - }, - - { "DecodeSVE100100x1", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE10010000", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"100", "VisitSVELoadPredicateRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE10010010", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"}, - {"10x", "VisitSVELoadVectorRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE100110x1", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE10011000", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"100", "VisitSVELoadPredicateRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE10011010", - {23, 22, 4}, - { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"}, - {"10x", "VisitSVELoadVectorRegister"}, - {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"}, - }, - }, - - { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm", - {20}, - { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE101xx101", - {20}, - { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, - {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE101x0110", - {22}, - { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"}, - {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"}, - }, - }, - - { "DecodeSVE101x0111", - {22, 20}, - { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"}, - {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE101x1111", - {22, 20}, - { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE110x0111", - {22, 4}, - { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"}, - {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - }, - }, - - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - }, - }, - - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - }, - }, - - { "DecodeSVE110010xx", - {23, 4}, - { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"}, - {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"}, - }, - }, - - { "DecodeSVE110011xx", - {23, 22, 4}, - { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"}, - {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"}, - {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, - }, - }, - - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - }, - }, - - { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets", - {22}, - { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"}, - }, - }, - - { "DecodeSVE110111xx", - {22}, - { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"}, - {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"}, - }, - }, - - { "DecodeSVE111x0011", - {22}, - { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"}, - {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"}, - }, - }, - - { "DecodeSVE111x01x0", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"}, - }, - }, - - { "DecodeSVE111x0101", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"}, - {"1", "VisitSVE64BitScatterStore_VectorPlusImm"}, - }, - }, - - { "DecodeSVE111x0111", - {22, 20}, - { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, - {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"}, - {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, - }, - }, - - { "DecodeSVE111x11x0", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"}, - }, - }, - - { "DecodeSVE111x1101", - {22}, - { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"}, - {"1", "VisitSVE32BitScatterStore_VectorPlusImm"}, - }, - }, - - { "DecodeSVE111x1111", - {22, 20}, - { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"}, - {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"}, - }, - }, - - { "UnallocSVEStorePredicateRegister", - {23, 22, 4}, - { {"100", "VisitSVEStorePredicateRegister"}, - }, - }, - - { "DecodeSVE1111x010", - {23, 22}, - { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"}, - {"10", "VisitSVEStoreVectorRegister"}, - {"11", "VisitSVEContiguousStore_ScalarPlusScalar"}, - }, - }, - { "DecodeNEONScalarAnd3SHA", {29, 23, 22, 15, 14, 11, 10}, { {"0xx0x00", "VisitCrypto3RegSHA"}, @@ -2103,28 +1388,6 @@ static const DecodeMapping kDecodeMapping[] = { {"otherwise", "VisitUnconditionalBranchToRegister"}, }, }, - - { "DecodeSVE101xxxxx", - {15, 14, 13}, - { {"101", "DecodeSVE101xx101"}, - {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"}, - {"otherwise", "VisitSVEMemContiguousLoad"}, - }, - }, - - { "DecodeSVE101xx101", - {20}, - { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"}, - {"1", "VisitSVEMemContiguousLoad"}, - }, - }, - - { "DecodeSVE00000001", - {20, 19}, - { {"10", "VisitSVEMovprfx"}, - {"otherwise", "VisitSVEIntReduction"}, - }, - }, }; // clang-format on diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index d8ac2d24..7d6fa148 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -24,7 +24,6 @@ // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include <bitset> #include <cstdlib> #include <sstream> @@ -957,7 +956,7 @@ void Disassembler::VisitTestBranch(const Instruction *instr) { // disassembled as Wt, otherwise Xt. As the top bit of the immediate is // encoded in bit 31 of the instruction, we can reuse the Rt form, which // uses bit 31 (normally "sf") to choose the register size. - const char *form = "'Rt, 'It, 'TImmTest"; + const char *form = "'Rt, 'IS, 'TImmTest"; switch (instr->Mask(TestBranchMask)) { case TBZ: @@ -1087,7 +1086,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) { #undef LS_UNSIGNEDOFFSET case PRFM_unsigned: mnemonic = "prfm"; - form = "'prefOp, ['Xns'ILU]"; + form = "'PrefOp, ['Xns'ILU]"; } Format(instr, mnemonic, form); } @@ -1166,7 +1165,7 @@ void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) { #undef LS_REGISTEROFFSET case PRFM_reg: mnemonic = "prfm"; - form = "'prefOp, ['Xns, 'Offsetreg]"; + form = "'PrefOp, ['Xns, 'Offsetreg]"; } Format(instr, mnemonic, form); } @@ -1181,7 +1180,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) { const char *form_s = "'St, ['Xns'ILS]"; const char *form_d = "'Dt, ['Xns'ILS]"; const char *form_q = "'Qt, ['Xns'ILS]"; - const char *form_prefetch = "'prefOp, ['Xns'ILS]"; + const char *form_prefetch = "'PrefOp, ['Xns'ILS]"; switch (instr->Mask(LoadStoreUnscaledOffsetMask)) { case STURB_w: @@ -1304,7 +1303,7 @@ void Disassembler::VisitLoadLiteral(const Instruction *instr) { } case PRFM_lit: { mnemonic = "prfm"; - form = "'prefOp, 'ILLiteral 'LValue"; + form = "'PrefOp, 'ILLiteral 'LValue"; break; } default: @@ -1487,14 +1486,14 @@ void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) { V(CASAH, "casah", "'Ws, 'Wt") \ V(CASLH, "caslh", "'Ws, 'Wt") \ V(CASALH, "casalh", "'Ws, 'Wt") \ - V(CASP_w, "casp", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASP_x, "casp", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPA_w, "caspa", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPA_x, "caspa", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPL_w, "caspl", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPL_x, "caspl", "'Xs, 'Xs+, 'Xt, 'Xt+") \ - V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+") \ - V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+") + V(CASP_w, "casp", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASP_x, "casp", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPA_w, "caspa", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPA_x, "caspa", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPL_w, "caspl", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPL_x, "caspl", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \ + V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \ + V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)") // clang-format on @@ -1899,15 +1898,15 @@ void Disassembler::VisitFPImmediate(const Instruction *instr) { switch (instr->Mask(FPImmediateMask)) { case FMOV_h_imm: mnemonic = "fmov"; - form = "'Hd, 'IFP"; + form = "'Hd, 'IFPHalf"; break; case FMOV_s_imm: mnemonic = "fmov"; - form = "'Sd, 'IFP"; + form = "'Sd, 'IFPSingle"; break; case FMOV_d_imm: mnemonic = "fmov"; - form = "'Dd, 'IFP"; + form = "'Dd, 'IFPDouble"; break; default: VIXL_UNREACHABLE(); @@ -3410,7 +3409,7 @@ void Disassembler::VisitNEONCopy(const Instruction *instr) { } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) { mnemonic = "smov"; nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); - form = "'R30d, 'Vn.%s['IVInsIndex1]"; + form = "'Rdq, 'Vn.%s['IVInsIndex1]"; } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) { mnemonic = "dup"; form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; @@ -4007,16 +4006,19 @@ void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) { } } else { // cmode<0> == '1' mnemonic = "fmov"; - form = "'Vt.%s, 'IFPNeon"; if (half_enc == 1) { + form = "'Vt.%s, 'IVMIImmFPHalf"; nfd.SetFormatMap(0, &map_h); } else if (op == 0) { + form = "'Vt.%s, 'IVMIImmFPSingle"; nfd.SetFormatMap(0, &map_s); - } else if (q == 1) { - form = "'Vt.2d, 'IFPNeon"; } else { - mnemonic = "unallocated"; - form = "(NEONModifiedImmediate)"; + if (q == 1) { + form = "'Vt.2d, 'IVMIImmFPDouble"; + } else { + mnemonic = "unallocated"; + form = "(NEONModifiedImmediate)"; + } } } } @@ -4924,4582 +4926,6 @@ void Disassembler::VisitNEONPerm(const Instruction *instr) { Format(instr, mnemonic, nfd.Substitute(form)); } -void Disassembler:: - VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]"; - - switch (instr->Mask( - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1H_z_p_bz_s_x32_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_s_x32_scaled: - mnemonic = "ld1sh"; - break; - case LDFF1H_z_p_bz_s_x32_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_s_x32_scaled: - mnemonic = "ldff1sh"; - break; - default: - form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]"; - - switch ( - instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1W_z_p_bz_s_x32_scaled: - mnemonic = "ld1w"; - break; - case LDFF1W_z_p_bz_s_x32_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( - const Instruction *instr) { - const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]"; - - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_s_x32_unscaled: - mnemonic = "ld1b"; - break; - case LD1H_z_p_bz_s_x32_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_s_x32_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_s_x32_unscaled: - mnemonic = "ld1sh"; - break; - case LD1W_z_p_bz_s_x32_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1H_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1W_z_p_bz_s_x32_unscaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm( - const Instruction *instr) { - const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]"; - const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]"; - const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]"; - const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]"; - const char *form_imm; - - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_s: - mnemonic = "ld1b"; - form_imm = form_imm_b; - break; - case LD1H_z_p_ai_s: - mnemonic = "ld1h"; - form_imm = form_imm_h; - break; - case LD1SB_z_p_ai_s: - mnemonic = "ld1sb"; - form_imm = form_imm_b; - break; - case LD1SH_z_p_ai_s: - mnemonic = "ld1sh"; - form_imm = form_imm_h; - break; - case LD1W_z_p_ai_s: - mnemonic = "ld1w"; - form_imm = form_imm_w; - break; - case LDFF1B_z_p_ai_s: - mnemonic = "ldff1b"; - form_imm = form_imm_b; - break; - case LDFF1H_z_p_ai_s: - mnemonic = "ldff1h"; - form_imm = form_imm_h; - break; - case LDFF1SB_z_p_ai_s: - mnemonic = "ldff1sb"; - form_imm = form_imm_b; - break; - case LDFF1SH_z_p_ai_s: - mnemonic = "ldff1sh"; - form_imm = form_imm_h; - break; - case LDFF1W_z_p_ai_s: - mnemonic = "ldff1w"; - form_imm = form_imm_w; - break; - default: - form = "(SVE32BitGatherLoad_VectorPlusImm)"; - form_imm = form; - break; - } - if (instr->ExtractBits(20, 16) != 0) form = form_imm; - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.s, '?22:suxtw"; - const char *suffix = NULL; - - switch ( - instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) { - case PRFB_i_p_bz_s_x32_scaled: - mnemonic = "prfb"; - suffix = "]"; - break; - case PRFD_i_p_bz_s_x32_scaled: - mnemonic = "prfd"; - suffix = " #3]"; - break; - case PRFH_i_p_bz_s_x32_scaled: - mnemonic = "prfh"; - suffix = " #1]"; - break; - case PRFW_i_p_bz_s_x32_scaled: - mnemonic = "prfw"; - suffix = " #2]"; - break; - default: - form = "(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBits(20, 16) != 0) - ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]" - : "'prefSVEOp, 'Pgl, ['Zn.s]"; - - switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) { - case PRFB_i_p_ai_s: - mnemonic = "prfb"; - break; - case PRFD_i_p_ai_s: - mnemonic = "prfd"; - break; - case PRFH_i_p_ai_s: - mnemonic = "prfh"; - break; - case PRFW_i_p_ai_s: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]"; - - switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { - case ST1H_z_p_bz_s_x32_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_s_x32_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]"; - - switch ( - instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_s_x32_unscaled: - mnemonic = "st1b"; - break; - case ST1H_z_p_bz_s_x32_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_s_x32_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.s}, 'Pgl, ['Zn.s"; - const char *suffix = NULL; - - bool is_zero = instr->ExtractBits(20, 16) == 0; - - switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) { - case ST1B_z_p_ai_s: - mnemonic = "st1b"; - suffix = is_zero ? "]" : ", #'u2016]"; - break; - case ST1H_z_p_ai_s: - mnemonic = "st1h"; - suffix = is_zero ? "]" : ", #'u2016*2]"; - break; - case ST1W_z_p_ai_s: - mnemonic = "st1w"; - suffix = is_zero ? "]" : ", #'u2016*4]"; - break; - default: - form = "(SVE32BitScatterStore_VectorPlusImm)"; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]"; - - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { - case LD1D_z_p_bz_d_x32_scaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_x32_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_d_x32_scaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_x32_scaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_x32_scaled: - mnemonic = "ld1w"; - break; - case LDFF1D_z_p_bz_d_x32_scaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_x32_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_d_x32_scaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_x32_scaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_x32_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]"; - - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { - case LD1D_z_p_bz_d_64_scaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_64_scaled: - mnemonic = "ld1h"; - break; - case LD1SH_z_p_bz_d_64_scaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_64_scaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_64_scaled: - mnemonic = "ld1w"; - break; - case LDFF1D_z_p_bz_d_64_scaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_64_scaled: - mnemonic = "ldff1h"; - break; - case LDFF1SH_z_p_bz_d_64_scaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_64_scaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_64_scaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]"; - - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_64_unscaled: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bz_d_64_unscaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_64_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_d_64_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_d_64_unscaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_64_unscaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_64_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_d_64_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_bz_d_64_unscaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_64_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_64_unscaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_64_unscaled: - mnemonic = "ldff1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler:: - VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]"; - - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_x32_unscaled: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bz_d_x32_unscaled: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bz_d_x32_unscaled: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bz_d_x32_unscaled: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bz_d_x32_unscaled: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_bz_d_x32_unscaled: - mnemonic = "ldff1w"; - break; - default: - form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( - const Instruction *instr) { - const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d]"; - const char *form_imm[4] = {"{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016]", - "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*2]", - "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*4]", - "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*8]"}; - - if (instr->ExtractBits(20, 16) != 0) { - unsigned msz = instr->ExtractBits(24, 23); - bool sign_extend = instr->ExtractBit(14) == 0; - if ((msz == kDRegSizeInBytesLog2) && sign_extend) { - form = "(SVE64BitGatherLoad_VectorPlusImm)"; - } else { - VIXL_ASSERT(msz < ArrayLength(form_imm)); - form = form_imm[msz]; - } - } - - const char *mnemonic = "unimplemented"; - switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_d: - mnemonic = "ld1b"; - break; - case LD1D_z_p_ai_d: - mnemonic = "ld1d"; - break; - case LD1H_z_p_ai_d: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_ai_d: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_ai_d: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_ai_d: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_ai_d: - mnemonic = "ld1w"; - break; - case LDFF1B_z_p_ai_d: - mnemonic = "ldff1b"; - break; - case LDFF1D_z_p_ai_d: - mnemonic = "ldff1d"; - break; - case LDFF1H_z_p_ai_d: - mnemonic = "ldff1h"; - break; - case LDFF1SB_z_p_ai_d: - mnemonic = "ldff1sb"; - break; - case LDFF1SH_z_p_ai_d: - mnemonic = "ldff1sh"; - break; - case LDFF1SW_z_p_ai_d: - mnemonic = "ldff1sw"; - break; - case LDFF1W_z_p_ai_d: - mnemonic = "ldff1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)"; - - switch ( - instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) { - case PRFB_i_p_bz_d_64_scaled: - mnemonic = "prfb"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]"; - break; - case PRFD_i_p_bz_d_64_scaled: - mnemonic = "prfd"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]"; - break; - case PRFH_i_p_bz_d_64_scaled: - mnemonic = "prfh"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]"; - break; - case PRFW_i_p_bz_d_64_scaled: - mnemonic = "prfw"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler:: - VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw"; - const char *suffix = NULL; - - switch (instr->Mask( - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - case PRFB_i_p_bz_d_x32_scaled: - mnemonic = "prfb"; - suffix = " ]"; - break; - case PRFD_i_p_bz_d_x32_scaled: - mnemonic = "prfd"; - suffix = " #3]"; - break; - case PRFH_i_p_bz_d_x32_scaled: - mnemonic = "prfh"; - suffix = " #1]"; - break; - case PRFW_i_p_bz_d_x32_scaled: - mnemonic = "prfw"; - suffix = " #2]"; - break; - default: - form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBits(20, 16) != 0) - ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]" - : "'prefSVEOp, 'Pgl, ['Zn.d]"; - - switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) { - case PRFB_i_p_ai_d: - mnemonic = "prfb"; - break; - case PRFD_i_p_ai_d: - mnemonic = "prfd"; - break; - case PRFH_i_p_ai_d: - mnemonic = "prfh"; - break; - case PRFW_i_p_ai_d: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]"; - - switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_64_scaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_64_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_64_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]"; - - switch ( - instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_64_unscaled: - mnemonic = "st1b"; - break; - case ST1D_z_p_bz_d_64_unscaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_64_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_64_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler:: - VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]"; - - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_x32_scaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_x32_scaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_x32_scaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler:: - VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]"; - - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_x32_unscaled: - mnemonic = "st1b"; - break; - case ST1D_z_p_bz_d_x32_unscaled: - mnemonic = "st1d"; - break; - case ST1H_z_p_bz_d_x32_unscaled: - mnemonic = "st1h"; - break; - case ST1W_z_p_bz_d_x32_unscaled: - mnemonic = "st1w"; - break; - default: - form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; - const char *suffix = NULL; - - bool is_zero = instr->ExtractBits(20, 16) == 0; - - switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) { - case ST1B_z_p_ai_d: - mnemonic = "st1b"; - suffix = is_zero ? "]" : ", #'u2016]"; - break; - case ST1D_z_p_ai_d: - mnemonic = "st1d"; - suffix = is_zero ? "]" : ", #'u2016*8]"; - break; - case ST1H_z_p_ai_d: - mnemonic = "st1h"; - suffix = is_zero ? "]" : ", #'u2016*2]"; - break; - case ST1W_z_p_ai_d: - mnemonic = "st1w"; - suffix = is_zero ? "]" : ", #'u2016*4]"; - break; - default: - form = "(SVE64BitScatterStore_VectorPlusImm)"; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel"; - - if (instr->GetSVEImmLogical() == 0) { - // The immediate encoded in the instruction is not in the expected format. - Format(instr, "unallocated", "(SVEBitwiseImm)"); - return; - } - - switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { - case AND_z_zi: - mnemonic = "and"; - break; - case EOR_z_zi: - mnemonic = "eor"; - break; - case ORR_z_zi: - mnemonic = "orr"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) { - case AND_z_p_zz: - mnemonic = "and"; - break; - case BIC_z_p_zz: - mnemonic = "bic"; - break; - case EOR_z_p_zz: - mnemonic = "eor"; - break; - case ORR_z_p_zz: - mnemonic = "orr"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBitwiseShiftByImm_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq"; - unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8); - - if (tsize == 0) { - form = "(SVEBitwiseShiftByImm_Predicated)"; - } else { - switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) { - case ASRD_z_p_zi: - mnemonic = "asrd"; - break; - case ASR_z_p_zi: - mnemonic = "asr"; - break; - case LSL_z_p_zi: - mnemonic = "lsl"; - form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep"; - break; - case LSR_z_p_zi: - mnemonic = "lsr"; - break; - default: - break; - } - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBitwiseShiftByVector_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - mnemonic = "asrr"; - break; - case ASR_z_p_zz: - mnemonic = "asr"; - break; - case LSLR_z_p_zz: - mnemonic = "lslr"; - break; - case LSL_z_p_zz: - mnemonic = "lsl"; - break; - case LSRR_z_p_zz: - mnemonic = "lsrr"; - break; - case LSR_z_p_zz: - mnemonic = "lsr"; - break; - default: - form = "(SVEBitwiseShiftByVector_Predicated)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d"; - - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "(SVEBitwiseShiftByWideElements_Predicated)"; - } else { - switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { - case ASR_z_p_zw: - mnemonic = "asr"; - break; - case LSL_z_p_zw: - mnemonic = "lsl"; - break; - case LSR_z_p_zw: - mnemonic = "lsr"; - break; - default: - form = "(SVEBitwiseShiftByWideElements_Predicated)"; - break; - } - } - Format(instr, mnemonic, form); -} - -static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) { - VIXL_ASSERT(IsUintN(8 << lane_bytes_log2, value)); - - // Duplicate lane-sized value across double word. - switch (lane_bytes_log2) { - case 0: - value *= 0x0101010101010101; - break; - case 1: - value *= 0x0001000100010001; - break; - case 2: - value *= 0x0000000100000001; - break; - case 3: // Nothing to do - break; - default: - VIXL_UNREACHABLE(); - } - - if ((value & 0xff) == 0) { - // Check for 16-bit patterns. Set least-significant 16 bits, to make tests - // easier; we already checked least-significant byte is zero above. - uint64_t generic_value = value | 0xffff; - - // Check 0x00000000_0000pq00 or 0xffffffff_ffffpq00. - if ((generic_value == 0xffff) || (generic_value == UINT64_MAX)) { - return false; - } - - // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00. - uint64_t rotvalue = RotateRight(value, 32, 64); - if (value == rotvalue) { - generic_value &= 0xffffffff; - if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) { - return false; - } - } - - // Check 0xpq00pq00_pq00pq00. - rotvalue = RotateRight(value, 16, 64); - if (value == rotvalue) { - return false; - } - } else { - // Check for 8-bit patterns. Set least-significant byte, to make tests - // easier. - uint64_t generic_value = value | 0xff; - - // Check 0x00000000_000000pq or 0xffffffff_ffffffpq. - if ((generic_value == 0xff) || (generic_value == UINT64_MAX)) { - return false; - } - - // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq. - uint64_t rotvalue = RotateRight(value, 32, 64); - if (value == rotvalue) { - generic_value &= 0xffffffff; - if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) { - return false; - } - } - - // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq. - rotvalue = RotateRight(value, 16, 64); - if (value == rotvalue) { - generic_value &= 0xffff; - if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) { - return false; - } - } - - // Check 0xpqpqpqpq_pqpqpqpq. - rotvalue = RotateRight(value, 8, 64); - if (value == rotvalue) { - return false; - } - } - return true; -} - -void Disassembler::VisitSVEBroadcastBitmaskImm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBroadcastBitmaskImm)"; - - switch (instr->Mask(SVEBroadcastBitmaskImmMask)) { - case DUPM_z_i: { - uint64_t imm = instr->GetSVEImmLogical(); - if (imm != 0) { - int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); - mnemonic = SVEMoveMaskPreferred(imm, lane_size) ? "mov" : "dupm"; - form = "'Zd.'tl, 'ITriSvel"; - } - break; - } - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBroadcastFPImm_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBroadcastFPImm_Unpredicated)"; - - switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { - case FDUP_z_i: - // The preferred disassembly for fdup is "fmov". - mnemonic = "fmov"; - form = "'Zd.'t, 'IFPSve"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBroadcastGeneralRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBroadcastGeneralRegister)"; - - switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) { - case DUP_z_r: - // The preferred disassembly for dup is "mov". - mnemonic = "mov"; - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "'Zd.'t, 'Xns"; - } else { - form = "'Zd.'t, 'Wns"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBroadcastIndexElement)"; - - switch (instr->Mask(SVEBroadcastIndexElementMask)) { - case DUP_z_zi: { - // The tsz field must not be zero. - int tsz = instr->ExtractBits(20, 16); - if (tsz != 0) { - // The preferred disassembly for dup is "mov". - mnemonic = "mov"; - int imm2 = instr->ExtractBits(23, 22); - if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) { - // If imm2:tsz has one set bit, the index is zero. This is - // disassembled as a mov from a b/h/s/d/q scalar register. - form = "'Zd.'tszx, 'tszx'u0905"; - } else { - form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]"; - } - } - break; - } - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBroadcastIntImm_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBroadcastIntImm_Unpredicated)"; - - switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) { - case DUP_z_i: - // The encoding of byte-sized lanes with lsl #8 is undefined. - if ((instr->GetSVEVectorFormat() == kFormatVnB) && - (instr->ExtractBit(13) == 1)) - break; - - // The preferred disassembly for dup is "mov". - mnemonic = "mov"; - form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, #'s1205" - : "'Zd.'t, #'s1205, lsl #8"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECompressActiveElements)"; - - switch (instr->Mask(SVECompressActiveElementsMask)) { - case COMPACT_z_p_z: - // The top bit of size is always set for compact, so 't can only be - // substituted with types S and D. - VIXL_ASSERT(instr->ExtractBit(23) == 1); - mnemonic = "compact"; - form = "'Zd.'t, 'Pgl, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEConditionallyBroadcastElementToVector( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) { - case CLASTA_z_p_zz: - mnemonic = "clasta"; - break; - case CLASTB_z_p_zz: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t"; - - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "'Xd, p'u1210, 'Xd, 'Zn.'t"; - } - - switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) { - case CLASTA_r_p_z: - mnemonic = "clasta"; - break; - case CLASTB_r_p_z: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; - - switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) { - case CLASTA_v_p_z: - mnemonic = "clasta"; - break; - case CLASTB_v_p_z: - mnemonic = "clastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEConditionallyTerminateScalars( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm"; - - switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { - case CTERMEQ_rr: - mnemonic = "ctermeq"; - break; - case CTERMNE_rr: - mnemonic = "ctermne"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEConstructivePrefix_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEConstructivePrefix_Unpredicated)"; - - switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { - case MOVPRFX_z_z: - mnemonic = "movprfx"; - form = "'Zd, 'Zn"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - - bool rm_is_zr = instr->GetRm() == kZeroRegCode; - - const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; - const char *suffix = NULL; - - switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) { - case LDFF1B_z_p_br_u16: - case LDFF1B_z_p_br_u32: - case LDFF1B_z_p_br_u64: - case LDFF1B_z_p_br_u8: - mnemonic = "ldff1b"; - suffix = rm_is_zr ? "]" : ", 'Xm]"; - break; - case LDFF1D_z_p_br_u64: - mnemonic = "ldff1d"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]"; - break; - case LDFF1H_z_p_br_u16: - case LDFF1H_z_p_br_u32: - case LDFF1H_z_p_br_u64: - mnemonic = "ldff1h"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; - break; - case LDFF1SB_z_p_br_s16: - case LDFF1SB_z_p_br_s32: - case LDFF1SB_z_p_br_s64: - mnemonic = "ldff1sb"; - suffix = rm_is_zr ? "]" : ", 'Xm]"; - break; - case LDFF1SH_z_p_br_s32: - case LDFF1SH_z_p_br_s64: - mnemonic = "ldff1sh"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]"; - break; - case LDFF1SW_z_p_br_s64: - mnemonic = "ldff1sw"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; - break; - case LDFF1W_z_p_br_u32: - case LDFF1W_z_p_br_u64: - mnemonic = "ldff1w"; - suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]"; - break; - default: - form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)"; - break; - } - - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - - switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) { - case LDNF1B_z_p_bi_u16: - case LDNF1B_z_p_bi_u32: - case LDNF1B_z_p_bi_u64: - case LDNF1B_z_p_bi_u8: - mnemonic = "ldnf1b"; - break; - case LDNF1D_z_p_bi_u64: - mnemonic = "ldnf1d"; - break; - case LDNF1H_z_p_bi_u16: - case LDNF1H_z_p_bi_u32: - case LDNF1H_z_p_bi_u64: - mnemonic = "ldnf1h"; - break; - case LDNF1SB_z_p_bi_s16: - case LDNF1SB_z_p_bi_s32: - case LDNF1SB_z_p_bi_s64: - mnemonic = "ldnf1sb"; - break; - case LDNF1SH_z_p_bi_s32: - case LDNF1SH_z_p_bi_s64: - mnemonic = "ldnf1sh"; - break; - case LDNF1SW_z_p_bi_s64: - mnemonic = "ldnf1sw"; - break; - case LDNF1W_z_p_bi_u32: - case LDNF1W_z_p_bi_u64: - mnemonic = "ldnf1w"; - break; - default: - form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)"; - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)"; - - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) { - case LDNT1B_z_p_bi_contiguous: - mnemonic = "ldnt1b"; - form = "{'Zt.b}, 'Pgl/z, ['Xns"; - break; - case LDNT1D_z_p_bi_contiguous: - mnemonic = "ldnt1d"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - break; - case LDNT1H_z_p_bi_contiguous: - mnemonic = "ldnt1h"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - break; - case LDNT1W_z_p_bi_contiguous: - mnemonic = "ldnt1w"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - break; - default: - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)"; - - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) { - case LDNT1B_z_p_br_contiguous: - mnemonic = "ldnt1b"; - form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; - break; - case LDNT1D_z_p_br_contiguous: - mnemonic = "ldnt1d"; - form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; - break; - case LDNT1H_z_p_br_contiguous: - mnemonic = "ldnt1h"; - form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; - break; - case LDNT1W_z_p_br_contiguous: - mnemonic = "ldnt1w"; - form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)"; - - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) { - case STNT1B_z_p_bi_contiguous: - mnemonic = "stnt1b"; - form = "{'Zt.b}, 'Pgl, ['Xns"; - break; - case STNT1D_z_p_bi_contiguous: - mnemonic = "stnt1d"; - form = "{'Zt.d}, 'Pgl, ['Xns"; - break; - case STNT1H_z_p_bi_contiguous: - mnemonic = "stnt1h"; - form = "{'Zt.h}, 'Pgl, ['Xns"; - break; - case STNT1W_z_p_bi_contiguous: - mnemonic = "stnt1w"; - form = "{'Zt.s}, 'Pgl, ['Xns"; - break; - default: - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusScalar)"; - - switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) { - case STNT1B_z_p_br_contiguous: - mnemonic = "stnt1b"; - form = "{'Zt.b}, 'Pgl, ['Xns, 'Rm]"; - break; - case STNT1D_z_p_br_contiguous: - mnemonic = "stnt1d"; - form = "{'Zt.d}, 'Pgl, ['Xns, 'Rm, lsl #3]"; - break; - case STNT1H_z_p_br_contiguous: - mnemonic = "stnt1h"; - form = "{'Zt.h}, 'Pgl, ['Xns, 'Rm, lsl #1]"; - break; - case STNT1W_z_p_br_contiguous: - mnemonic = "stnt1w"; - form = "{'Zt.s}, 'Pgl, ['Xns, 'Rm, lsl #2]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBits(21, 16) != 0) - ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]" - : "'prefSVEOp, 'Pgl, ['Xns]"; - - switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) { - case PRFB_i_p_bi_s: - mnemonic = "prfb"; - break; - case PRFD_i_p_bi_s: - mnemonic = "prfd"; - break; - case PRFH_i_p_bi_s: - mnemonic = "prfh"; - break; - case PRFW_i_p_bi_s: - mnemonic = "prfw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEContiguousPrefetch_ScalarPlusScalar)"; - - if (instr->GetRm() != kZeroRegCode) { - switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) { - case PRFB_i_p_br_s: - mnemonic = "prfb"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm]"; - break; - case PRFD_i_p_br_s: - mnemonic = "prfd"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #3]"; - break; - case PRFH_i_p_br_s: - mnemonic = "prfh"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #1]"; - break; - case PRFW_i_p_br_s: - mnemonic = "prfw"; - form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #2]"; - break; - default: - break; - } - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousStore_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - - // The 'size' field isn't in the usual place here. - const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]"; - if (instr->ExtractBits(19, 16) == 0) { - form = "{'Zt.'tls}, 'Pgl, ['Xns]"; - } - - switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) { - case ST1B_z_p_bi: - mnemonic = "st1b"; - break; - case ST1D_z_p_bi: - mnemonic = "st1d"; - break; - case ST1H_z_p_bi: - mnemonic = "st1h"; - break; - case ST1W_z_p_bi: - mnemonic = "st1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - - // The 'size' field isn't in the usual place here. - const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) { - case ST1B_z_p_br: - mnemonic = "st1b"; - break; - case ST1D_z_p_br: - mnemonic = "st1d"; - break; - case ST1H_z_p_br: - mnemonic = "st1h"; - break; - case ST1W_z_p_br: - mnemonic = "st1w"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECopyFPImm_Predicated)"; - - switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { - case FCPY_z_p_i: - // The preferred disassembly for fcpy is "fmov". - mnemonic = "fmov"; - form = "'Zd.'t, 'Pm/m, 'IFPSve"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVECopyGeneralRegisterToVector_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECopyGeneralRegisterToVector_Predicated)"; - - switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { - case CPY_z_p_r: - // The preferred disassembly for cpy is "mov". - mnemonic = "mov"; - form = "'Zd.'t, 'Pgl/m, 'Wns"; - if (instr->GetSVESize() == kXRegSizeInBytesLog2) { - form = "'Zd.'t, 'Pgl/m, 'Xns"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVECopyIntImm_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECopyIntImm_Predicated)"; - const char *suffix = NULL; - - switch (instr->Mask(SVECopyIntImm_PredicatedMask)) { - case CPY_z_p_i: { - // The preferred disassembly for cpy is "mov". - mnemonic = "mov"; - form = "'Zd.'t, 'Pm/'?14:mz, #'s1205"; - if (instr->ExtractBit(13) != 0) suffix = ", lsl #8"; - break; - } - default: - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVECopySIMDFPScalarRegisterToVector_Predicated)"; - - switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { - case CPY_z_p_v: - // The preferred disassembly for cpy is "mov". - mnemonic = "mov"; - form = "'Zd.'t, 'Pgl/m, 'Vnv"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEExtractElementToGeneralRegister( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Wd, 'Pgl, 'Zn.'t"; - - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "'Xd, p'u1210, 'Zn.'t"; - } - - switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) { - case LASTA_r_p_z: - mnemonic = "lasta"; - break; - case LASTB_r_p_z: - mnemonic = "lastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; - - switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) { - case LASTA_v_p_z: - mnemonic = "lasta"; - break; - case LASTB_v_p_z: - mnemonic = "lastb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFFRInitialise)"; - - switch (instr->Mask(SVEFFRInitialiseMask)) { - case SETFFR_f: - mnemonic = "setffr"; - form = " "; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFFRWriteFromPredicate)"; - - switch (instr->Mask(SVEFFRWriteFromPredicateMask)) { - case WRFFR_f_p: - mnemonic = "wrffr"; - form = "'Pn.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPArithmeticWithImm_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0"; - const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5"; - const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0"; - const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0"; - int i1 = instr->ExtractBit(5); - const char *form = i1 ? form10 : form00; - - switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) { - case FADD_z_p_zs: - mnemonic = "fadd"; - form = i1 ? form10 : form05; - break; - case FMAXNM_z_p_zs: - mnemonic = "fmaxnm"; - break; - case FMAX_z_p_zs: - mnemonic = "fmax"; - break; - case FMINNM_z_p_zs: - mnemonic = "fminnm"; - break; - case FMIN_z_p_zs: - mnemonic = "fmin"; - break; - case FMUL_z_p_zs: - mnemonic = "fmul"; - form = i1 ? form20 : form05; - break; - case FSUBR_z_p_zs: - mnemonic = "fsubr"; - form = i1 ? form10 : form05; - break; - case FSUB_z_p_zs: - mnemonic = "fsub"; - form = i1 ? form10 : form05; - break; - default: - form = "(SVEFPArithmeticWithImm_Predicated)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { - case FABD_z_p_zz: - mnemonic = "fabd"; - break; - case FADD_z_p_zz: - mnemonic = "fadd"; - break; - case FDIVR_z_p_zz: - mnemonic = "fdivr"; - break; - case FDIV_z_p_zz: - mnemonic = "fdiv"; - break; - case FMAXNM_z_p_zz: - mnemonic = "fmaxnm"; - break; - case FMAX_z_p_zz: - mnemonic = "fmax"; - break; - case FMINNM_z_p_zz: - mnemonic = "fminnm"; - break; - case FMIN_z_p_zz: - mnemonic = "fmin"; - break; - case FMULX_z_p_zz: - mnemonic = "fmulx"; - break; - case FMUL_z_p_zz: - mnemonic = "fmul"; - break; - case FSCALE_z_p_zz: - mnemonic = "fscale"; - break; - case FSUBR_z_p_zz: - mnemonic = "fsubr"; - break; - case FSUB_z_p_zz: - mnemonic = "fsub"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPConvertPrecision)"; - - switch (instr->Mask(SVEFPConvertPrecisionMask)) { - case FCVT_z_p_z_d2h: - mnemonic = "fcvt"; - form = "'Zd.h, 'Pgl/m, 'Zn.d"; - break; - case FCVT_z_p_z_d2s: - mnemonic = "fcvt"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case FCVT_z_p_z_h2d: - mnemonic = "fcvt"; - form = "'Zd.d, 'Pgl/m, 'Zn.h"; - break; - case FCVT_z_p_z_h2s: - mnemonic = "fcvt"; - form = "'Zd.s, 'Pgl/m, 'Zn.h"; - break; - case FCVT_z_p_z_s2d: - mnemonic = "fcvt"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case FCVT_z_p_z_s2h: - mnemonic = "fcvt"; - form = "'Zd.h, 'Pgl/m, 'Zn.s"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPConvertToInt)"; - - switch (instr->Mask(SVEFPConvertToIntMask)) { - case FCVTZS_z_p_z_d2w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case FCVTZS_z_p_z_d2x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case FCVTZS_z_p_z_fp162h: - mnemonic = "fcvtzs"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_fp162w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_fp162x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.h"; - break; - case FCVTZS_z_p_z_s2w: - mnemonic = "fcvtzs"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case FCVTZS_z_p_z_s2x: - mnemonic = "fcvtzs"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case FCVTZU_z_p_z_d2w: - mnemonic = "fcvtzu"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case FCVTZU_z_p_z_d2x: - mnemonic = "fcvtzu"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case FCVTZU_z_p_z_fp162h: - mnemonic = "fcvtzu"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case FCVTZU_z_p_z_fp162w: - mnemonic = "fcvtzu"; - form = "'Zd.s, 'Pgl/m, 'Zn.h"; - break; - case FCVTZU_z_p_z_fp162x: - mnemonic = "fcvtzu"; - form = "'Zd.d, 'Pgl/m, 'Zn.h"; - break; - case FCVTZU_z_p_z_s2w: - mnemonic = "fcvtzu"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case FCVTZU_z_p_z_s2x: - mnemonic = "fcvtzu"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPExponentialAccelerator)"; - - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPExponentialAcceleratorMask)) { - case FEXPA_z_z: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "fexpa"; - form = "'Zd.'t, 'Zn.'t"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { - case FRINTA_z_p_z: - mnemonic = "frinta"; - break; - case FRINTI_z_p_z: - mnemonic = "frinti"; - break; - case FRINTM_z_p_z: - mnemonic = "frintm"; - break; - case FRINTN_z_p_z: - mnemonic = "frintn"; - break; - case FRINTP_z_p_z: - mnemonic = "frintp"; - break; - case FRINTX_z_p_z: - mnemonic = "frintx"; - break; - case FRINTZ_z_p_z: - mnemonic = "frintz"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPTrigMulAddCoefficient)"; - - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { - case FTMAD_z_zzi: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "ftmad"; - form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPTrigSelectCoefficient)"; - - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) { - case FTSSEL_z_zz: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "ftssel"; - form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - if (instr->GetSVESize() == kBRegSizeInBytesLog2) { - form = "(SVEFPUnaryOp)"; - } else { - switch (instr->Mask(SVEFPUnaryOpMask)) { - case FRECPX_z_p_z: - mnemonic = "frecpx"; - break; - case FSQRT_z_p_z: - mnemonic = "fsqrt"; - break; - default: - form = "(SVEFPUnaryOp)"; - break; - } - } - Format(instr, mnemonic, form); -} - -static const char *IncDecFormHelper(const Instruction *instr, - const char *reg_pat_mul_form, - const char *reg_pat_form, - const char *reg_form) { - if (instr->ExtractBits(19, 16) == 0) { - if (instr->ExtractBits(9, 5) == SVE_ALL) { - // Use the register only form if the multiplier is one (encoded as zero) - // and the pattern is SVE_ALL. - return reg_form; - } - // Use the register and pattern form if the multiplier is one. - return reg_pat_form; - } - return reg_pat_mul_form; -} - -void Disassembler::VisitSVEIncDecRegisterByElementCount( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = - IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); - - switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) { - case DECB_r_rs: - mnemonic = "decb"; - break; - case DECD_r_rs: - mnemonic = "decd"; - break; - case DECH_r_rs: - mnemonic = "dech"; - break; - case DECW_r_rs: - mnemonic = "decw"; - break; - case INCB_r_rs: - mnemonic = "incb"; - break; - case INCD_r_rs: - mnemonic = "incd"; - break; - case INCH_r_rs: - mnemonic = "inch"; - break; - case INCW_r_rs: - mnemonic = "incw"; - break; - default: - form = "(SVEIncDecRegisterByElementCount)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIncDecVectorByElementCount( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = IncDecFormHelper(instr, - "'Zd.'t, 'Ipc, mul #'u1916+1", - "'Zd.'t, 'Ipc", - "'Zd.'t"); - - switch (instr->Mask(SVEIncDecVectorByElementCountMask)) { - case DECD_z_zs: - mnemonic = "decd"; - break; - case DECH_z_zs: - mnemonic = "dech"; - break; - case DECW_z_zs: - mnemonic = "decw"; - break; - case INCD_z_zs: - mnemonic = "incd"; - break; - case INCH_z_zs: - mnemonic = "inch"; - break; - case INCW_z_zs: - mnemonic = "incw"; - break; - default: - form = "(SVEIncDecVectorByElementCount)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEInsertGeneralRegister)"; - - switch (instr->Mask(SVEInsertGeneralRegisterMask)) { - case INSR_z_r: - mnemonic = "insr"; - if (instr->GetSVESize() == kDRegSizeInBytesLog2) { - form = "'Zd.'t, 'Xn"; - } else { - form = "'Zd.'t, 'Wn"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEInsertSIMDFPScalarRegister( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEInsertSIMDFPScalarRegister)"; - - switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) { - case INSR_z_v: - mnemonic = "insr"; - form = "'Zd.'t, 'Vnv"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = (instr->ExtractBit(13) == 0) - ? "'Zd.'t, 'Zd.'t, #'u1205" - : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8"; - - switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { - case ADD_z_zi: - mnemonic = "add"; - break; - case SQADD_z_zi: - mnemonic = "sqadd"; - break; - case SQSUB_z_zi: - mnemonic = "sqsub"; - break; - case SUBR_z_zi: - mnemonic = "subr"; - break; - case SUB_z_zi: - mnemonic = "sub"; - break; - case UQADD_z_zi: - mnemonic = "uqadd"; - break; - case UQSUB_z_zi: - mnemonic = "uqsub"; - break; - default: - form = "(SVEIntAddSubtractImm_Unpredicated)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntAddSubtractVectors_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) { - case ADD_z_p_zz: - mnemonic = "add"; - break; - case SUBR_z_p_zz: - mnemonic = "subr"; - break; - case SUB_z_p_zz: - mnemonic = "sub"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntCompareScalarCountAndLimit( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = - (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm"; - - switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { - case WHILELE_p_p_rr: - mnemonic = "whilele"; - break; - case WHILELO_p_p_rr: - mnemonic = "whilelo"; - break; - case WHILELS_p_p_rr: - mnemonic = "whilels"; - break; - case WHILELT_p_p_rr: - mnemonic = "whilelt"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntConvertToFP)"; - - switch (instr->Mask(SVEIntConvertToFPMask)) { - case SCVTF_z_p_z_h2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case SCVTF_z_p_z_w2d: - mnemonic = "scvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_w2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_w2s: - mnemonic = "scvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case SCVTF_z_p_z_x2d: - mnemonic = "scvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case SCVTF_z_p_z_x2fp16: - mnemonic = "scvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.d"; - break; - case SCVTF_z_p_z_x2s: - mnemonic = "scvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - case UCVTF_z_p_z_h2fp16: - mnemonic = "ucvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.h"; - break; - case UCVTF_z_p_z_w2d: - mnemonic = "ucvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.s"; - break; - case UCVTF_z_p_z_w2fp16: - mnemonic = "ucvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.s"; - break; - case UCVTF_z_p_z_w2s: - mnemonic = "ucvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.s"; - break; - case UCVTF_z_p_z_x2d: - mnemonic = "ucvtf"; - form = "'Zd.d, 'Pgl/m, 'Zn.d"; - break; - case UCVTF_z_p_z_x2fp16: - mnemonic = "ucvtf"; - form = "'Zd.h, 'Pgl/m, 'Zn.d"; - break; - case UCVTF_z_p_z_x2s: - mnemonic = "ucvtf"; - form = "'Zd.s, 'Pgl/m, 'Zn.d"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntDivideVectors_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - mnemonic = "sdivr"; - break; - case SDIV_z_p_zz: - mnemonic = "sdiv"; - break; - case UDIVR_z_p_zz: - mnemonic = "udivr"; - break; - case UDIV_z_p_zz: - mnemonic = "udiv"; - break; - default: - break; - } - - switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - case SDIV_z_p_zz: - case UDIVR_z_p_zz: - case UDIV_z_p_zz: - switch (instr->GetSVESize()) { - case kBRegSizeInBytesLog2: - case kHRegSizeInBytesLog2: - mnemonic = "unimplemented"; - form = "(SVEIntBinaryArithmeticPredicated)"; - break; - case kSRegSizeInBytesLog2: - case kDRegSizeInBytesLog2: - // The default form works for these instructions. - break; - default: - // GetSVESize() should never return other values. - VIXL_UNREACHABLE(); - break; - } - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMinMaxDifference_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) { - case SABD_z_p_zz: - mnemonic = "sabd"; - break; - case SMAX_z_p_zz: - mnemonic = "smax"; - break; - case SMIN_z_p_zz: - mnemonic = "smin"; - break; - case UABD_z_p_zz: - mnemonic = "uabd"; - break; - case UMAX_z_p_zz: - mnemonic = "umax"; - break; - case UMIN_z_p_zz: - mnemonic = "umin"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zd.'t, #'u1205"; - - switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) { - case SMAX_z_zi: - mnemonic = "smax"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - case SMIN_z_zi: - mnemonic = "smin"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - case UMAX_z_zi: - mnemonic = "umax"; - break; - case UMIN_z_zi: - mnemonic = "umin"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulImm_Unpredicated)"; - - switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) { - case MUL_z_zi: - mnemonic = "mul"; - form = "'Zd.'t, 'Zd.'t, #'s1205"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) { - case MUL_z_p_zz: - mnemonic = "mul"; - break; - case SMULH_z_p_zz: - mnemonic = "smulh"; - break; - case UMULH_z_p_zz: - mnemonic = "umulh"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadAndBroadcastElement)"; - const char *suffix_b = ", #'u2116]"; - const char *suffix_h = ", #'u2116*2]"; - const char *suffix_w = ", #'u2116*4]"; - const char *suffix_d = ", #'u2116*8]"; - const char *suffix = NULL; - - switch (instr->Mask(SVELoadAndBroadcastElementMask)) { - case LD1RB_z_p_bi_u16: - mnemonic = "ld1rb"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u32: - mnemonic = "ld1rb"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u64: - mnemonic = "ld1rb"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RB_z_p_bi_u8: - mnemonic = "ld1rb"; - form = "{'Zt.b}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RD_z_p_bi_u64: - mnemonic = "ld1rd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_d; - break; - case LD1RH_z_p_bi_u16: - mnemonic = "ld1rh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RH_z_p_bi_u32: - mnemonic = "ld1rh"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RH_z_p_bi_u64: - mnemonic = "ld1rh"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RSB_z_p_bi_s16: - mnemonic = "ld1rsb"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RSB_z_p_bi_s32: - mnemonic = "ld1rsb"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RSB_z_p_bi_s64: - mnemonic = "ld1rsb"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_b; - break; - case LD1RSH_z_p_bi_s32: - mnemonic = "ld1rsh"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RSH_z_p_bi_s64: - mnemonic = "ld1rsh"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_h; - break; - case LD1RSW_z_p_bi_s64: - mnemonic = "ld1rsw"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_w; - break; - case LD1RW_z_p_bi_u32: - mnemonic = "ld1rw"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - suffix = suffix_w; - break; - case LD1RW_z_p_bi_u64: - mnemonic = "ld1rw"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - suffix = suffix_w; - break; - default: - break; - } - - // Hide curly brackets if immediate is zero. - if (instr->ExtractBits(21, 16) == 0) { - suffix = "]"; - } - - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)"; - - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]"; - - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { - case LD1RQB_z_p_bi_u8: - mnemonic = "ld1rqb"; - form = "{'Zt.b}, 'Pgl/z, ['Xns"; - break; - case LD1RQD_z_p_bi_u64: - mnemonic = "ld1rqd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns"; - break; - case LD1RQH_z_p_bi_u16: - mnemonic = "ld1rqh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns"; - break; - case LD1RQW_z_p_bi_u32: - mnemonic = "ld1rqw"; - form = "{'Zt.s}, 'Pgl/z, ['Xns"; - break; - default: - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)"; - - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { - case LD1RQB_z_p_br_contiguous: - mnemonic = "ld1rqb"; - form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; - break; - case LD1RQD_z_p_br_contiguous: - mnemonic = "ld1rqd"; - form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; - break; - case LD1RQH_z_p_br_contiguous: - mnemonic = "ld1rqh"; - form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; - break; - case LD1RQW_z_p_br_contiguous: - mnemonic = "ld1rqw"; - form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl/z, ['Xns'ISveSvl]"; - - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) { - case LD2B_z_p_bi_contiguous: - mnemonic = "ld2b"; - form = form_2; - break; - case LD2D_z_p_bi_contiguous: - mnemonic = "ld2d"; - form = form_2; - break; - case LD2H_z_p_bi_contiguous: - mnemonic = "ld2h"; - form = form_2; - break; - case LD2W_z_p_bi_contiguous: - mnemonic = "ld2w"; - form = form_2; - break; - case LD3B_z_p_bi_contiguous: - mnemonic = "ld3b"; - form = form_3; - break; - case LD3D_z_p_bi_contiguous: - mnemonic = "ld3d"; - form = form_3; - break; - case LD3H_z_p_bi_contiguous: - mnemonic = "ld3h"; - form = form_3; - break; - case LD3W_z_p_bi_contiguous: - mnemonic = "ld3w"; - form = form_3; - break; - case LD4B_z_p_bi_contiguous: - mnemonic = "ld4b"; - form = form_4; - break; - case LD4D_z_p_bi_contiguous: - mnemonic = "ld4d"; - form = form_4; - break; - case LD4H_z_p_bi_contiguous: - mnemonic = "ld4h"; - form = form_4; - break; - case LD4W_z_p_bi_contiguous: - mnemonic = "ld4w"; - form = form_4; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl/z, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) { - case LD2B_z_p_br_contiguous: - mnemonic = "ld2b"; - form = form_2; - break; - case LD2D_z_p_br_contiguous: - mnemonic = "ld2d"; - form = form_2; - break; - case LD2H_z_p_br_contiguous: - mnemonic = "ld2h"; - form = form_2; - break; - case LD2W_z_p_br_contiguous: - mnemonic = "ld2w"; - form = form_2; - break; - case LD3B_z_p_br_contiguous: - mnemonic = "ld3b"; - form = form_3; - break; - case LD3D_z_p_br_contiguous: - mnemonic = "ld3d"; - form = form_3; - break; - case LD3H_z_p_br_contiguous: - mnemonic = "ld3h"; - form = form_3; - break; - case LD3W_z_p_br_contiguous: - mnemonic = "ld3w"; - form = form_3; - break; - case LD4B_z_p_br_contiguous: - mnemonic = "ld4b"; - form = form_4; - break; - case LD4D_z_p_br_contiguous: - mnemonic = "ld4d"; - form = form_4; - break; - case LD4H_z_p_br_contiguous: - mnemonic = "ld4h"; - form = form_4; - break; - case LD4W_z_p_br_contiguous: - mnemonic = "ld4w"; - form = form_4; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadPredicateRegister)"; - - switch (instr->Mask(SVELoadPredicateRegisterMask)) { - case LDR_p_bi: - mnemonic = "ldr"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Pd, ['Xns]"; - } else { - form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVELoadVectorRegister)"; - - switch (instr->Mask(SVELoadVectorRegisterMask)) { - case LDR_z_bi: - mnemonic = "ldr"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Zd, ['Xns]"; - } else { - form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b"; - - switch (instr->Mask(SVEPartitionBreakConditionMask)) { - case BRKAS_p_p_p_z: - mnemonic = "brkas"; - break; - case BRKA_p_p_p: - mnemonic = "brka"; - break; - case BRKBS_p_p_p_z: - mnemonic = "brkbs"; - break; - case BRKB_p_p_p: - mnemonic = "brkb"; - break; - default: - form = "(SVEPartitionBreakCondition)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t"; - - switch (instr->Mask(SVEPermutePredicateElementsMask)) { - case TRN1_p_pp: - mnemonic = "trn1"; - break; - case TRN2_p_pp: - mnemonic = "trn2"; - break; - case UZP1_p_pp: - mnemonic = "uzp1"; - break; - case UZP2_p_pp: - mnemonic = "uzp2"; - break; - case ZIP1_p_pp: - mnemonic = "zip1"; - break; - case ZIP2_p_pp: - mnemonic = "zip2"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateFirstActive)"; - - switch (instr->Mask(SVEPredicateFirstActiveMask)) { - case PFIRST_p_p_p: - mnemonic = "pfirst"; - form = "'Pd.b, 'Pn, 'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateReadFromFFR_Unpredicated)"; - - switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) { - case RDFFR_p_f: - mnemonic = "rdffr"; - form = "'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateTest(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateTest)"; - - switch (instr->Mask(SVEPredicateTestMask)) { - case PTEST_p_p: - mnemonic = "ptest"; - form = "p'u1310, 'Pn.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateZero(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateZero)"; - - switch (instr->Mask(SVEPredicateZeroMask)) { - case PFALSE_p: - mnemonic = "pfalse"; - form = "'Pd.b"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPropagateBreakToNextPartition( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b"; - - switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) { - case BRKNS_p_p_pp: - mnemonic = "brkns"; - break; - case BRKN_p_p_pp: - mnemonic = "brkn"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEReversePredicateElements)"; - - switch (instr->Mask(SVEReversePredicateElementsMask)) { - case REV_p_p: - mnemonic = "rev"; - form = "'Pd.'t, 'Pn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEReverseVectorElements)"; - - switch (instr->Mask(SVEReverseVectorElementsMask)) { - case REV_z_z: - mnemonic = "rev"; - form = "'Zd.'t, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - unsigned size = instr->GetSVESize(); - switch (instr->Mask(SVEReverseWithinElementsMask)) { - case RBIT_z_p_z: - mnemonic = "rbit"; - break; - case REVB_z_z: - if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || - (size == kDRegSizeInBytesLog2)) { - mnemonic = "revb"; - } else { - form = "(SVEReverseWithinElements)"; - } - break; - case REVH_z_z: - if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) { - mnemonic = "revh"; - } else { - form = "(SVEReverseWithinElements)"; - } - break; - case REVW_z_z: - if (size == kDRegSizeInBytesLog2) { - mnemonic = "revw"; - } else { - form = "(SVEReverseWithinElements)"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = IncDecFormHelper(instr, - "'R20d, 'Ipc, mul #'u1916+1", - "'R20d, 'Ipc", - "'R20d"); - const char *form_sx = IncDecFormHelper(instr, - "'Xd, 'Wd, 'Ipc, mul #'u1916+1", - "'Xd, 'Wd, 'Ipc", - "'Xd, 'Wd"); - - switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) { - case SQDECB_r_rs_sx: - mnemonic = "sqdecb"; - form = form_sx; - break; - case SQDECD_r_rs_sx: - mnemonic = "sqdecd"; - form = form_sx; - break; - case SQDECH_r_rs_sx: - mnemonic = "sqdech"; - form = form_sx; - break; - case SQDECW_r_rs_sx: - mnemonic = "sqdecw"; - form = form_sx; - break; - case SQINCB_r_rs_sx: - mnemonic = "sqincb"; - form = form_sx; - break; - case SQINCD_r_rs_sx: - mnemonic = "sqincd"; - form = form_sx; - break; - case SQINCH_r_rs_sx: - mnemonic = "sqinch"; - form = form_sx; - break; - case SQINCW_r_rs_sx: - mnemonic = "sqincw"; - form = form_sx; - break; - case SQDECB_r_rs_x: - mnemonic = "sqdecb"; - break; - case SQDECD_r_rs_x: - mnemonic = "sqdecd"; - break; - case SQDECH_r_rs_x: - mnemonic = "sqdech"; - break; - case SQDECW_r_rs_x: - mnemonic = "sqdecw"; - break; - case SQINCB_r_rs_x: - mnemonic = "sqincb"; - break; - case SQINCD_r_rs_x: - mnemonic = "sqincd"; - break; - case SQINCH_r_rs_x: - mnemonic = "sqinch"; - break; - case SQINCW_r_rs_x: - mnemonic = "sqincw"; - break; - case UQDECB_r_rs_uw: - case UQDECB_r_rs_x: - mnemonic = "uqdecb"; - break; - case UQDECD_r_rs_uw: - case UQDECD_r_rs_x: - mnemonic = "uqdecd"; - break; - case UQDECH_r_rs_uw: - case UQDECH_r_rs_x: - mnemonic = "uqdech"; - break; - case UQDECW_r_rs_uw: - case UQDECW_r_rs_x: - mnemonic = "uqdecw"; - break; - case UQINCB_r_rs_uw: - case UQINCB_r_rs_x: - mnemonic = "uqincb"; - break; - case UQINCD_r_rs_uw: - case UQINCD_r_rs_x: - mnemonic = "uqincd"; - break; - case UQINCH_r_rs_uw: - case UQINCH_r_rs_x: - mnemonic = "uqinch"; - break; - case UQINCW_r_rs_uw: - case UQINCW_r_rs_x: - mnemonic = "uqincw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVESaturatingIncDecVectorByElementCount( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = IncDecFormHelper(instr, - "'Zd.'t, 'Ipc, mul #'u1916+1", - "'Zd.'t, 'Ipc", - "'Zd.'t"); - - switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) { - case SQDECD_z_zs: - mnemonic = "sqdecd"; - break; - case SQDECH_z_zs: - mnemonic = "sqdech"; - break; - case SQDECW_z_zs: - mnemonic = "sqdecw"; - break; - case SQINCD_z_zs: - mnemonic = "sqincd"; - break; - case SQINCH_z_zs: - mnemonic = "sqinch"; - break; - case SQINCW_z_zs: - mnemonic = "sqincw"; - break; - case UQDECD_z_zs: - mnemonic = "uqdecd"; - break; - case UQDECH_z_zs: - mnemonic = "uqdech"; - break; - case UQDECW_z_zs: - mnemonic = "uqdecw"; - break; - case UQINCD_z_zs: - mnemonic = "uqincd"; - break; - case UQINCH_z_zs: - mnemonic = "uqinch"; - break; - case UQINCW_z_zs: - mnemonic = "uqincw"; - break; - default: - form = "(SVEElementCount)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl, ['Xns'ISveSvl]"; - - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) { - case ST2B_z_p_bi_contiguous: - mnemonic = "st2b"; - form = form_2; - break; - case ST2H_z_p_bi_contiguous: - mnemonic = "st2h"; - form = form_2; - break; - case ST2W_z_p_bi_contiguous: - mnemonic = "st2w"; - form = form_2; - break; - case ST2D_z_p_bi_contiguous: - mnemonic = "st2d"; - form = form_2; - break; - case ST3B_z_p_bi_contiguous: - mnemonic = "st3b"; - form = form_3; - break; - case ST3H_z_p_bi_contiguous: - mnemonic = "st3h"; - form = form_3; - break; - case ST3W_z_p_bi_contiguous: - mnemonic = "st3w"; - form = form_3; - break; - case ST3D_z_p_bi_contiguous: - mnemonic = "st3d"; - form = form_3; - break; - case ST4B_z_p_bi_contiguous: - mnemonic = "st4b"; - form = form_4; - break; - case ST4H_z_p_bi_contiguous: - mnemonic = "st4h"; - form = form_4; - break; - case ST4W_z_p_bi_contiguous: - mnemonic = "st4w"; - form = form_4; - break; - case ST4D_z_p_bi_contiguous: - mnemonic = "st4d"; - form = form_4; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)"; - - const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; - const char *form_3 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]"; - const char *form_4 = - "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, " - "'Pgl, ['Xns, 'Xm'NSveS]"; - - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) { - case ST2B_z_p_br_contiguous: - mnemonic = "st2b"; - form = form_2; - break; - case ST2D_z_p_br_contiguous: - mnemonic = "st2d"; - form = form_2; - break; - case ST2H_z_p_br_contiguous: - mnemonic = "st2h"; - form = form_2; - break; - case ST2W_z_p_br_contiguous: - mnemonic = "st2w"; - form = form_2; - break; - case ST3B_z_p_br_contiguous: - mnemonic = "st3b"; - form = form_3; - break; - case ST3D_z_p_br_contiguous: - mnemonic = "st3d"; - form = form_3; - break; - case ST3H_z_p_br_contiguous: - mnemonic = "st3h"; - form = form_3; - break; - case ST3W_z_p_br_contiguous: - mnemonic = "st3w"; - form = form_3; - break; - case ST4B_z_p_br_contiguous: - mnemonic = "st4b"; - form = form_4; - break; - case ST4D_z_p_br_contiguous: - mnemonic = "st4d"; - form = form_4; - break; - case ST4H_z_p_br_contiguous: - mnemonic = "st4h"; - form = form_4; - break; - case ST4W_z_p_br_contiguous: - mnemonic = "st4w"; - form = form_4; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStorePredicateRegister)"; - - switch (instr->Mask(SVEStorePredicateRegisterMask)) { - case STR_p_bi: - mnemonic = "str"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Pd, ['Xns]"; - } else { - form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStoreVectorRegister)"; - - switch (instr->Mask(SVEStoreVectorRegisterMask)) { - case STR_z_bi: - mnemonic = "str"; - if (instr->Mask(0x003f1c00) == 0) { - form = "'Zd, ['Xns]"; - } else { - form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVETableLookup(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVETableLookup)"; - - switch (instr->Mask(SVETableLookupMask)) { - case TBL_z_zz_1: - mnemonic = "tbl"; - form = "'Zd.'t, {'Zn.'t}, 'Zm.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.h, 'Pn.b"; - - switch (instr->Mask(SVEUnpackPredicateElementsMask)) { - case PUNPKHI_p_p: - mnemonic = "punpkhi"; - break; - case PUNPKLO_p_p: - mnemonic = "punpklo"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'th"; - - if (instr->GetSVESize() == 0) { - // The lowest lane size of the destination vector is H-sized lane. - Format(instr, "unallocated", "(SVEUnpackVectorElements)"); - return; - } - - switch (instr->Mask(SVEUnpackVectorElementsMask)) { - case SUNPKHI_z_z: - mnemonic = "sunpkhi"; - break; - case SUNPKLO_z_z: - mnemonic = "sunpklo"; - break; - case UUNPKHI_z_z: - mnemonic = "uunpkhi"; - break; - case UUNPKLO_z_z: - mnemonic = "uunpklo"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEVectorSplice_Destructive)"; - - switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - mnemonic = "splice"; - form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) { - const char *mnemonic = "adr"; - const char *form = "'Zd.d, ['Zn.d, 'Zm.d"; - const char *suffix = NULL; - - bool msz_is_zero = (instr->ExtractBits(11, 10) == 0); - - switch (instr->Mask(SVEAddressGenerationMask)) { - case ADR_z_az_d_s32_scaled: - suffix = msz_is_zero ? ", sxtw]" : ", sxtw #'u1110]"; - break; - case ADR_z_az_d_u32_scaled: - suffix = msz_is_zero ? ", uxtw]" : ", uxtw #'u1110]"; - break; - case ADR_z_az_s_same_scaled: - case ADR_z_az_d_same_scaled: - form = "'Zd.'t, ['Zn.'t, 'Zm.'t"; - suffix = msz_is_zero ? "]" : ", lsl #'u1110]"; - break; - default: - mnemonic = "unimplemented"; - form = "(SVEAddressGeneration)"; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEBitwiseLogicalUnpredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.d, 'Zn.d, 'Zm.d"; - - switch (instr->Mask(SVEBitwiseLogicalUnpredicatedMask)) { - case AND_z_zz: - mnemonic = "and"; - break; - case BIC_z_zz: - mnemonic = "bic"; - break; - case EOR_z_zz: - mnemonic = "eor"; - break; - case ORR_z_zz: - mnemonic = "orr"; - if (instr->GetRn() == instr->GetRm()) { - mnemonic = "mov"; - form = "'Zd.d, 'Zn.d"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEBitwiseShiftUnpredicated)"; - unsigned tsize = - (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); - unsigned lane_size = instr->GetSVESize(); - - switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { - case ASR_z_zi: - if (tsize != 0) { - // The tsz field must not be zero. - mnemonic = "asr"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; - } - break; - case ASR_z_zw: - if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "asr"; - form = "'Zd.'t, 'Zn.'t, 'Zm.d"; - } - break; - case LSL_z_zi: - if (tsize != 0) { - // The tsz field must not be zero. - mnemonic = "lsl"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver"; - } - break; - case LSL_z_zw: - if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "lsl"; - form = "'Zd.'t, 'Zn.'t, 'Zm.d"; - } - break; - case LSR_z_zi: - if (tsize != 0) { - // The tsz field must not be zero. - mnemonic = "lsr"; - form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; - } - break; - case LSR_z_zw: - if (lane_size <= kSRegSizeInBytesLog2) { - mnemonic = "lsr"; - form = "'Zd.'t, 'Zn.'t, 'Zm.d"; - } - break; - default: - break; - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEElementCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = - IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); - - switch (instr->Mask(SVEElementCountMask)) { - case CNTB_r_s: - mnemonic = "cntb"; - break; - case CNTD_r_s: - mnemonic = "cntd"; - break; - case CNTH_r_s: - mnemonic = "cnth"; - break; - case CNTW_r_s: - mnemonic = "cntw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPAccumulatingReduction)"; - - switch (instr->Mask(SVEFPAccumulatingReductionMask)) { - case FADDA_v_p_z: - mnemonic = "fadda"; - form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { - case FADD_z_zz: - mnemonic = "fadd"; - break; - case FMUL_z_zz: - mnemonic = "fmul"; - break; - case FRECPS_z_zz: - mnemonic = "frecps"; - break; - case FRSQRTS_z_zz: - mnemonic = "frsqrts"; - break; - case FSUB_z_zz: - mnemonic = "fsub"; - break; - case FTSMUL_z_zz: - mnemonic = "ftsmul"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPCompareVectorsMask)) { - case FACGE_p_p_zz: - mnemonic = "facge"; - break; - case FACGT_p_p_zz: - mnemonic = "facgt"; - break; - case FCMEQ_p_p_zz: - mnemonic = "fcmeq"; - break; - case FCMGE_p_p_zz: - mnemonic = "fcmge"; - break; - case FCMGT_p_p_zz: - mnemonic = "fcmgt"; - break; - case FCMNE_p_p_zz: - mnemonic = "fcmne"; - break; - case FCMUO_p_p_zz: - mnemonic = "fcmuo"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0"; - - switch (instr->Mask(SVEFPCompareWithZeroMask)) { - case FCMEQ_p_p_z0: - mnemonic = "fcmeq"; - break; - case FCMGE_p_p_z0: - mnemonic = "fcmge"; - break; - case FCMGT_p_p_z0: - mnemonic = "fcmgt"; - break; - case FCMLE_p_p_z0: - mnemonic = "fcmle"; - break; - case FCMLT_p_p_z0: - mnemonic = "fcmlt"; - break; - case FCMNE_p_p_z0: - mnemonic = "fcmne"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexAddition)"; - - switch (instr->Mask(SVEFPComplexAdditionMask)) { - case FCADD_z_p_zz: - mnemonic = "fcadd"; - if (instr->ExtractBit(16) == 0) { - form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90"; - } else { - form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexMulAdd)"; - const char *suffix = NULL; - - const char *fcmla_constants[] = {"0", "90", "180", "270"}; - - switch (instr->Mask(SVEFPComplexMulAddMask)) { - case FCMLA_z_p_zzz: - mnemonic = "fcmla"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #"; - suffix = fcmla_constants[instr->ExtractBits(14, 13)]; - break; - default: - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPComplexMulAddIndex)"; - - const char *fcmla_constants[] = {"0", "90", "180", "270"}; - const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)]; - - switch (instr->Mask(SVEFPComplexMulAddIndexMask)) { - case FCMLA_z_zzzi_h: - mnemonic = "fcmla"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #"; - break; - case FCMLA_z_zzzi_s: - mnemonic = "fcmla"; - form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #"; - break; - default: - suffix = NULL; - break; - } - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'t'u0400, 'Pgl, 'Zn.'t"; - - switch (instr->Mask(SVEFPFastReductionMask)) { - case FADDV_v_p_z: - mnemonic = "faddv"; - break; - case FMAXNMV_v_p_z: - mnemonic = "fmaxnmv"; - break; - case FMAXV_v_p_z: - mnemonic = "fmaxv"; - break; - case FMINNMV_v_p_z: - mnemonic = "fminnmv"; - break; - case FMINV_v_p_z: - mnemonic = "fminv"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPMulIndex)"; - - switch (instr->Mask(SVEFPMulIndexMask)) { - case FMUL_z_zzi_d: - mnemonic = "fmul"; - form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; - break; - case FMUL_z_zzi_h: - case FMUL_z_zzi_h_i3h: - mnemonic = "fmul"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - case FMUL_z_zzi_s: - mnemonic = "fmul"; - form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEFPMulAddMask)) { - case FMAD_z_p_zzz: - mnemonic = "fmad"; - break; - case FMLA_z_p_zzz: - mnemonic = "fmla"; - break; - case FMLS_z_p_zzz: - mnemonic = "fmls"; - break; - case FMSB_z_p_zzz: - mnemonic = "fmsb"; - break; - case FNMAD_z_p_zzz: - mnemonic = "fnmad"; - break; - case FNMLA_z_p_zzz: - mnemonic = "fnmla"; - break; - case FNMLS_z_p_zzz: - mnemonic = "fnmls"; - break; - case FNMSB_z_p_zzz: - mnemonic = "fnmsb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEFPMulAddIndex)"; - - switch (instr->Mask(SVEFPMulAddIndexMask)) { - case FMLA_z_zzzi_d: - mnemonic = "fmla"; - form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; - break; - case FMLA_z_zzzi_s: - mnemonic = "fmla"; - form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; - break; - case FMLS_z_zzzi_d: - mnemonic = "fmls"; - form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; - break; - case FMLS_z_zzzi_s: - mnemonic = "fmls"; - form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; - break; - case FMLA_z_zzzi_h: - case FMLA_z_zzzi_h_i3h: - mnemonic = "fmla"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - case FMLS_z_zzzi_h: - case FMLS_z_zzzi_h_i3h: - mnemonic = "fmls"; - form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; - break; - default: - break; - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t"; - - switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { - case FRECPE_z_z: - mnemonic = "frecpe"; - break; - case FRSQRTE_z_z: - mnemonic = "frsqrte"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIncDecByPredicateCount)"; - - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { - case DECP_r_p_r: - case DECP_z_p_z: - mnemonic = "decp"; - break; - case INCP_r_p_r: - case INCP_z_p_z: - mnemonic = "incp"; - break; - case SQDECP_r_p_r_sx: - case SQDECP_r_p_r_x: - case SQDECP_z_p_z: - mnemonic = "sqdecp"; - break; - case SQINCP_r_p_r_sx: - case SQINCP_r_p_r_x: - case SQINCP_z_p_z: - mnemonic = "sqincp"; - break; - case UQDECP_r_p_r_uw: - case UQDECP_r_p_r_x: - case UQDECP_z_p_z: - mnemonic = "uqdecp"; - break; - case UQINCP_r_p_r_uw: - case UQINCP_r_p_r_x: - case UQINCP_z_p_z: - mnemonic = "uqincp"; - break; - default: - break; - } - - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { - // <Xdn>, <Pg>.<T> - case DECP_r_p_r: - case INCP_r_p_r: - form = "'Xd, 'Pn.'t"; - break; - // <Zdn>.<T>, <Pg> - case DECP_z_p_z: - case INCP_z_p_z: - case SQDECP_z_p_z: - case SQINCP_z_p_z: - case UQDECP_z_p_z: - case UQINCP_z_p_z: - form = "'Zd.'t, 'Pn"; - break; - // <Xdn>, <Pg>.<T>, <Wdn> - case SQDECP_r_p_r_sx: - case SQINCP_r_p_r_sx: - form = "'Xd, 'Pn.'t, 'Wd"; - break; - // <Xdn>, <Pg>.<T> - case SQDECP_r_p_r_x: - case SQINCP_r_p_r_x: - case UQDECP_r_p_r_x: - case UQINCP_r_p_r_x: - form = "'Xd, 'Pn.'t"; - break; - // <Wdn>, <Pg>.<T> - case UQDECP_r_p_r_uw: - case UQINCP_r_p_r_uw: - form = "'Wd, 'Pn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIndexGeneration)"; - - bool w_inputs = - static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2; - - switch (instr->Mask(SVEIndexGenerationMask)) { - case INDEX_z_ii: - mnemonic = "index"; - form = "'Zd.'t, #'s0905, #'s2016"; - break; - case INDEX_z_ir: - mnemonic = "index"; - form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm"; - break; - case INDEX_z_ri: - mnemonic = "index"; - form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016"; - break; - case INDEX_z_rr: - mnemonic = "index"; - form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) { - case ADD_z_zz: - mnemonic = "add"; - break; - case SQADD_z_zz: - mnemonic = "sqadd"; - break; - case SQSUB_z_zz: - mnemonic = "sqsub"; - break; - case SUB_z_zz: - mnemonic = "sub"; - break; - case UQADD_z_zz: - mnemonic = "uqadd"; - break; - case UQSUB_z_zz: - mnemonic = "uqsub"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016"; - - switch (instr->Mask(SVEIntCompareSignedImmMask)) { - case CMPEQ_p_p_zi: - mnemonic = "cmpeq"; - break; - case CMPGE_p_p_zi: - mnemonic = "cmpge"; - break; - case CMPGT_p_p_zi: - mnemonic = "cmpgt"; - break; - case CMPLE_p_p_zi: - mnemonic = "cmple"; - break; - case CMPLT_p_p_zi: - mnemonic = "cmplt"; - break; - case CMPNE_p_p_zi: - mnemonic = "cmpne"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014"; - - switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { - case CMPHI_p_p_zi: - mnemonic = "cmphi"; - break; - case CMPHS_p_p_zi: - mnemonic = "cmphs"; - break; - case CMPLO_p_p_zi: - mnemonic = "cmplo"; - break; - case CMPLS_p_p_zi: - mnemonic = "cmpls"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d"; - - switch (instr->Mask(SVEIntCompareVectorsMask)) { - case CMPEQ_p_p_zw: - mnemonic = "cmpeq"; - break; - case CMPEQ_p_p_zz: - mnemonic = "cmpeq"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPGE_p_p_zw: - mnemonic = "cmpge"; - break; - case CMPGE_p_p_zz: - mnemonic = "cmpge"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPGT_p_p_zw: - mnemonic = "cmpgt"; - break; - case CMPGT_p_p_zz: - mnemonic = "cmpgt"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPHI_p_p_zw: - mnemonic = "cmphi"; - break; - case CMPHI_p_p_zz: - mnemonic = "cmphi"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPHS_p_p_zw: - mnemonic = "cmphs"; - break; - case CMPHS_p_p_zz: - mnemonic = "cmphs"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - case CMPLE_p_p_zw: - mnemonic = "cmple"; - break; - case CMPLO_p_p_zw: - mnemonic = "cmplo"; - break; - case CMPLS_p_p_zw: - mnemonic = "cmpls"; - break; - case CMPLT_p_p_zw: - mnemonic = "cmplt"; - break; - case CMPNE_p_p_zw: - mnemonic = "cmpne"; - break; - case CMPNE_p_p_zz: - mnemonic = "cmpne"; - form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulAddPredicated)"; - - switch (instr->Mask(SVEIntMulAddPredicatedMask)) { - case MAD_z_p_zzz: - mnemonic = "mad"; - form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; - break; - case MLA_z_p_zzz: - mnemonic = "mla"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - break; - case MLS_z_p_zzz: - mnemonic = "mls"; - form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"; - break; - case MSB_z_p_zzz: - mnemonic = "msb"; - form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEIntMulAddUnpredicated)"; - - if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) { - form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq"; - switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: - mnemonic = "sdot"; - break; - case UDOT_z_zzz: - mnemonic = "udot"; - break; - default: - break; - } - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEMovprfx(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEMovprfx)"; - - if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) { - mnemonic = "movprfx"; - form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t"; - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntReduction(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Vdv, 'Pgl, 'Zn.'t"; - - if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) { - switch (instr->Mask(SVEIntReductionLogicalMask)) { - case ANDV_r_p_z: - mnemonic = "andv"; - break; - case EORV_r_p_z: - mnemonic = "eorv"; - break; - case ORV_r_p_z: - mnemonic = "orv"; - break; - default: - break; - } - } else { - switch (instr->Mask(SVEIntReductionMask)) { - case SADDV_r_p_z: - mnemonic = "saddv"; - form = "'Dd, 'Pgl, 'Zn.'t"; - break; - case SMAXV_r_p_z: - mnemonic = "smaxv"; - break; - case SMINV_r_p_z: - mnemonic = "sminv"; - break; - case UADDV_r_p_z: - mnemonic = "uaddv"; - form = "'Dd, 'Pgl, 'Zn.'t"; - break; - case UMAXV_r_p_z: - mnemonic = "umaxv"; - break; - case UMINV_r_p_z: - mnemonic = "uminv"; - break; - default: - break; - } - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEIntUnaryArithmeticPredicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; - - switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) { - case ABS_z_p_z: - mnemonic = "abs"; - break; - case CLS_z_p_z: - mnemonic = "cls"; - break; - case CLZ_z_p_z: - mnemonic = "clz"; - break; - case CNOT_z_p_z: - mnemonic = "cnot"; - break; - case CNT_z_p_z: - mnemonic = "cnt"; - break; - case FABS_z_p_z: - mnemonic = "fabs"; - break; - case FNEG_z_p_z: - mnemonic = "fneg"; - break; - case NEG_z_p_z: - mnemonic = "neg"; - break; - case NOT_z_p_z: - mnemonic = "not"; - break; - case SXTB_z_p_z: - mnemonic = "sxtb"; - break; - case SXTH_z_p_z: - mnemonic = "sxth"; - break; - case SXTW_z_p_z: - mnemonic = "sxtw"; - break; - case UXTB_z_p_z: - mnemonic = "uxtb"; - break; - case UXTH_z_p_z: - mnemonic = "uxth"; - break; - case UXTW_z_p_z: - mnemonic = "uxtw"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEMulIndex(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEMulIndex)"; - - switch (instr->Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_d: - mnemonic = "sdot"; - form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; - break; - case SDOT_z_zzzi_s: - mnemonic = "sdot"; - form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; - break; - case UDOT_z_zzzi_d: - mnemonic = "udot"; - form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; - break; - case UDOT_z_zzzi_s: - mnemonic = "udot"; - form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPermuteVectorExtract)"; - - switch (instr->Mask(SVEPermuteVectorExtractMask)) { - case EXT_z_zi_des: - mnemonic = "ext"; - form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; - - switch (instr->Mask(SVEPermuteVectorInterleavingMask)) { - case TRN1_z_zz: - mnemonic = "trn1"; - break; - case TRN2_z_zz: - mnemonic = "trn2"; - break; - case UZP1_z_zz: - mnemonic = "uzp1"; - break; - case UZP2_z_zz: - mnemonic = "uzp2"; - break; - case ZIP1_z_zz: - mnemonic = "zip1"; - break; - case ZIP2_z_zz: - mnemonic = "zip2"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateCount(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateCount)"; - - switch (instr->Mask(SVEPredicateCountMask)) { - case CNTP_r_p_p: - mnemonic = "cntp"; - form = "'Xd, p'u1310, 'Pn.'t"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; - - int pd = instr->GetPd(); - int pn = instr->GetPn(); - int pm = instr->GetPm(); - int pg = instr->ExtractBits(13, 10); - - switch (instr->Mask(SVEPredicateLogicalMask)) { - case ANDS_p_p_pp_z: - mnemonic = "ands"; - if (pn == pm) { - mnemonic = "movs"; - form = "'Pd.b, p'u1310/z, 'Pn.b"; - } - break; - case AND_p_p_pp_z: - mnemonic = "and"; - if (pn == pm) { - mnemonic = "mov"; - form = "'Pd.b, p'u1310/z, 'Pn.b"; - } - break; - case BICS_p_p_pp_z: - mnemonic = "bics"; - break; - case BIC_p_p_pp_z: - mnemonic = "bic"; - break; - case EORS_p_p_pp_z: - mnemonic = "eors"; - if (pm == pg) { - mnemonic = "nots"; - form = "'Pd.b, 'Pm/z, 'Pn.b"; - } - break; - case EOR_p_p_pp_z: - mnemonic = "eor"; - if (pm == pg) { - mnemonic = "not"; - form = "'Pd.b, 'Pm/z, 'Pn.b"; - } - break; - case NANDS_p_p_pp_z: - mnemonic = "nands"; - break; - case NAND_p_p_pp_z: - mnemonic = "nand"; - break; - case NORS_p_p_pp_z: - mnemonic = "nors"; - break; - case NOR_p_p_pp_z: - mnemonic = "nor"; - break; - case ORNS_p_p_pp_z: - mnemonic = "orns"; - break; - case ORN_p_p_pp_z: - mnemonic = "orn"; - break; - case ORRS_p_p_pp_z: - mnemonic = "orrs"; - if ((pn == pm) && (pn == pg)) { - mnemonic = "movs"; - form = "'Pd.b, 'Pn.b"; - } - break; - case ORR_p_p_pp_z: - mnemonic = "orr"; - if ((pn == pm) && (pn == pg)) { - mnemonic = "mov"; - form = "'Pd.b, 'Pn.b"; - } - break; - case SEL_p_p_pp: - if (pd == pm) { - mnemonic = "mov"; - form = "'Pd.b, p'u1310/m, 'Pn.b"; - } else { - mnemonic = "sel"; - form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b"; - } - break; - default: - form = "(SVEPredicateLogical)"; - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) { - // This group only contains PTRUE{S}, and there are no unallocated encodings. - VIXL_STATIC_ASSERT( - SVEPredicateInitializeMask == - (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit)); - VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) || - (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s)); - - const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue"; - const char *form = "'Pd.'t, 'Ipc"; - // Omit the pattern if it is the default ('ALL'). - if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t"; - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) { - // This group only contains PNEXT, and there are no unallocated encodings. - VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask); - VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p); - - Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t"); -} - -void Disassembler::VisitSVEPredicateReadFromFFR_Predicated( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEPredicateReadFromFFR_Predicated)"; - switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) { - case RDFFR_p_p_f: - case RDFFRS_p_p_f: - mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr"; - form = "'Pd.b, 'Pn/z"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; - - switch (instr->Mask(SVEPropagateBreakMask)) { - case BRKPAS_p_p_pp: - mnemonic = "brkpas"; - break; - case BRKPA_p_p_pp: - mnemonic = "brkpa"; - break; - case BRKPBS_p_p_pp: - mnemonic = "brkpbs"; - break; - case BRKPB_p_p_pp: - mnemonic = "brkpb"; - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "'Xds, 'Xms, #'s1005"; - - switch (instr->Mask(SVEStackFrameAdjustmentMask)) { - case ADDPL_r_ri: - mnemonic = "addpl"; - break; - case ADDVL_r_ri: - mnemonic = "addvl"; - break; - default: - form = "(SVEStackFrameAdjustment)"; - break; - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEStackFrameSize)"; - - switch (instr->Mask(SVEStackFrameSizeMask)) { - case RDVL_r_i: - mnemonic = "rdvl"; - form = "'Xd, #'s1005"; - break; - default: - break; - } - - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEVectorSelect(const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "(SVEVectorSelect)"; - - switch (instr->Mask(SVEVectorSelectMask)) { - case SEL_z_p_zz: - if (instr->GetRd() == instr->GetRm()) { - mnemonic = "mov"; - form = "'Zd.'t, p'u1310/m, 'Zn.'t"; - } else { - mnemonic = "sel"; - form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t"; - } - break; - default: - break; - } - Format(instr, mnemonic, form); -} - -void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; - const char *suffix = - (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; - - switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) { - case LD1B_z_p_bi_u16: - case LD1B_z_p_bi_u32: - case LD1B_z_p_bi_u64: - case LD1B_z_p_bi_u8: - mnemonic = "ld1b"; - break; - case LD1D_z_p_bi_u64: - mnemonic = "ld1d"; - break; - case LD1H_z_p_bi_u16: - case LD1H_z_p_bi_u32: - case LD1H_z_p_bi_u64: - mnemonic = "ld1h"; - break; - case LD1SB_z_p_bi_s16: - case LD1SB_z_p_bi_s32: - case LD1SB_z_p_bi_s64: - mnemonic = "ld1sb"; - break; - case LD1SH_z_p_bi_s32: - case LD1SH_z_p_bi_s64: - mnemonic = "ld1sh"; - break; - case LD1SW_z_p_bi_s64: - mnemonic = "ld1sw"; - break; - case LD1W_z_p_bi_u32: - case LD1W_z_p_bi_u64: - mnemonic = "ld1w"; - break; - default: - form = "(SVEContiguousLoad_ScalarPlusImm)"; - suffix = NULL; - break; - } - - Format(instr, mnemonic, form, suffix); -} - -void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar( - const Instruction *instr) { - const char *mnemonic = "unimplemented"; - const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm"; - const char *suffix = NULL; - - switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { - case LD1B_z_p_br_u16: - case LD1B_z_p_br_u32: - case LD1B_z_p_br_u64: - case LD1B_z_p_br_u8: - mnemonic = "ld1b"; - suffix = "]"; - break; - case LD1D_z_p_br_u64: - mnemonic = "ld1d"; - suffix = ", lsl #'u2423]"; - break; - case LD1H_z_p_br_u16: - case LD1H_z_p_br_u32: - case LD1H_z_p_br_u64: - mnemonic = "ld1h"; - suffix = ", lsl #'u2423]"; - break; - case LD1SB_z_p_br_s16: - case LD1SB_z_p_br_s32: - case LD1SB_z_p_br_s64: - mnemonic = "ld1sb"; - suffix = "]"; - break; - case LD1SH_z_p_br_s32: - case LD1SH_z_p_br_s64: - mnemonic = "ld1sh"; - suffix = ", lsl #1]"; - break; - case LD1SW_z_p_br_s64: - mnemonic = "ld1sw"; - suffix = ", lsl #2]"; - break; - case LD1W_z_p_br_u32: - case LD1W_z_p_br_u64: - mnemonic = "ld1w"; - suffix = ", lsl #'u2423]"; - break; - default: - form = "(SVEContiguousLoad_ScalarPlusScalar)"; - suffix = NULL; - break; - } - - Format(instr, mnemonic, form, suffix); -} void Disassembler::VisitReserved(const Instruction *instr) { // UDF is the only instruction in this group, and the Decoder is precise. @@ -9633,18 +5059,14 @@ int64_t Disassembler::CodeRelativeAddress(const void *addr) { void Disassembler::Format(const Instruction *instr, const char *mnemonic, - const char *format0, - const char *format1) { + const char *format) { VIXL_ASSERT(mnemonic != NULL); ResetOutput(); Substitute(instr, mnemonic); - if (format0 != NULL) { + if (format != NULL) { VIXL_ASSERT(buffer_pos_ < buffer_size_); buffer_[buffer_pos_++] = ' '; - Substitute(instr, format0); - if (format1 != NULL) { - Substitute(instr, format1); - } + Substitute(instr, format); } VIXL_ASSERT(buffer_pos_ < buffer_size_); buffer_[buffer_pos_] = 0; @@ -9669,11 +5091,10 @@ void Disassembler::Substitute(const Instruction *instr, const char *string) { int Disassembler::SubstituteField(const Instruction *instr, const char *format) { switch (format[0]) { - // NB. The remaining substitution prefix upper-case characters are: JU. - case 'R': // Register. X or W, selected by sf (or alternative) bit. + // NB. The remaining substitution prefix characters are: GJKUZ. + case 'R': // Register. X or W, selected by sf bit. case 'F': // FP register. S or D, selected by type field. case 'V': // Vector register, V, vector format. - case 'Z': // Scalable vector register. case 'W': case 'X': case 'B': @@ -9682,14 +5103,14 @@ int Disassembler::SubstituteField(const Instruction *instr, case 'D': case 'Q': return SubstituteRegisterField(instr, format); - case 'P': - return SubstitutePredicateRegisterField(instr, format); case 'I': return SubstituteImmediateField(instr, format); case 'L': return SubstituteLiteralField(instr, format); case 'N': return SubstituteShiftField(instr, format); + case 'P': + return SubstitutePrefetchField(instr, format); case 'C': return SubstituteConditionField(instr, format); case 'E': @@ -9706,15 +5127,6 @@ int Disassembler::SubstituteField(const Instruction *instr, return SubstituteCrField(instr, format); case 'G': return SubstituteSysOpField(instr, format); - case 'p': - return SubstitutePrefetchField(instr, format); - case 'u': - case 's': - return SubstituteIntField(instr, format); - case 't': - return SubstituteSVESize(instr, format); - case '?': - return SubstituteTernary(instr, format); default: { VIXL_UNREACHABLE(); return 1; @@ -9722,20 +5134,55 @@ int Disassembler::SubstituteField(const Instruction *instr, } } -std::pair<unsigned, unsigned> Disassembler::GetRegNumForField( - const Instruction *instr, char reg_prefix, const char *field) { - unsigned reg_num = UINT_MAX; - unsigned field_len = 1; - switch (field[0]) { +int Disassembler::SubstituteRegisterField(const Instruction *instr, + const char *format) { + char reg_prefix = format[0]; + unsigned reg_num = 0; + unsigned field_len = 2; + + switch (format[1]) { case 'd': reg_num = instr->GetRd(); + if (format[2] == 'q') { + reg_prefix = instr->GetNEONQ() ? 'X' : 'W'; + field_len = 3; + } break; case 'n': reg_num = instr->GetRn(); break; case 'm': reg_num = instr->GetRm(); + switch (format[2]) { + // Handle registers tagged with b (bytes), z (instruction), or + // r (registers), used for address updates in + // NEON load/store instructions. + case 'r': + case 'b': + case 'z': { + field_len = 3; + char *eimm; + int imm = static_cast<int>(strtol(&format[3], &eimm, 10)); + field_len += eimm - &format[3]; + if (reg_num == 31) { + switch (format[2]) { + case 'z': + imm *= (1 << instr->GetNEONLSSize()); + break; + case 'r': + imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes + : kQRegSizeInBytes; + break; + case 'b': + break; + } + AppendToOutput("#%d", imm); + return field_len; + } + break; + } + } break; case 'e': // This is register Rm, but using a 4-bit specifier. Used in NEON @@ -9750,121 +5197,72 @@ std::pair<unsigned, unsigned> Disassembler::GetRegNumForField( break; case 't': reg_num = instr->GetRt(); - break; - default: - VIXL_UNREACHABLE(); - } - - switch (field[1]) { - case '2': - case '3': - case '4': - if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // Vt2/3/4, Zt2/3/4 - VIXL_ASSERT(field[0] == 't'); - reg_num = (reg_num + field[1] - '1') % 32; - field_len++; + if (format[0] == 'V') { + if ((format[2] >= '2') && (format[2] <= '4')) { + // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4. + reg_num = (reg_num + format[2] - '1') % 32; + field_len = 3; + } } else { - VIXL_ASSERT((field[0] == 't') && (field[1] == '2')); - reg_num = instr->GetRt2(); - field_len++; + if (format[2] == '2') { + // Handle register specifier Rt2. + reg_num = instr->GetRt2(); + field_len = 3; + } } break; - case '+': // Rt+, Rs+ (ie. Rt + 1, Rs + 1) - VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); - VIXL_ASSERT((field[0] == 's') || (field[0] == 't')); - reg_num++; - field_len++; - break; - case 's': // Core registers that are (w)sp rather than zr. - VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); - reg_num = (reg_num == kZeroRegCode) ? kSPRegInternalCode : reg_num; - field_len++; - break; - } - - VIXL_ASSERT(reg_num != UINT_MAX); - return std::make_pair(reg_num, field_len); -} - -int Disassembler::SubstituteRegisterField(const Instruction *instr, - const char *format) { - unsigned field_len = 1; // Initially, count only the first character. - - // The first character of the register format field, eg R, X, S, etc. - char reg_prefix = format[0]; + case '(': { + switch (format[2]) { + case 's': + reg_num = instr->GetRs(); + break; + case 't': + reg_num = instr->GetRt(); + break; + default: + VIXL_UNREACHABLE(); + } - // Pointer to the character after the prefix. This may be one of the standard - // symbols representing a register encoding, or a two digit bit position, - // handled by the following code. - const char *reg_field = &format[1]; - - if (reg_prefix == 'R') { - bool is_x = instr->GetSixtyFourBits(); - if (strspn(reg_field, "0123456789") == 2) { // r20d, r31n, etc. - // Core W or X registers where the type is determined by a specified bit - // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31 - // is implicitly used to select between W and X. - int bitpos = ((reg_field[0] - '0') * 10) + (reg_field[1] - '0'); - VIXL_ASSERT(bitpos <= 31); - is_x = (instr->ExtractBit(bitpos) == 1); - reg_field = &format[3]; - field_len += 2; + VIXL_ASSERT(format[3] == '+'); + int i = 4; + int addition = 0; + while (format[i] != ')') { + VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9')); + addition *= 10; + addition += format[i] - '0'; + ++i; + } + reg_num += addition; + field_len = i + 1; + break; } - reg_prefix = is_x ? 'X' : 'W'; + default: + VIXL_UNREACHABLE(); } - std::pair<unsigned, unsigned> rn = - GetRegNumForField(instr, reg_prefix, reg_field); - unsigned reg_num = rn.first; - field_len += rn.second; - - if (reg_field[0] == 'm') { - switch (reg_field[1]) { - // Handle registers tagged with b (bytes), z (instruction), or - // r (registers), used for address updates in NEON load/store - // instructions. - case 'r': - case 'b': - case 'z': { - VIXL_ASSERT(reg_prefix == 'X'); - field_len = 3; - char *eimm; - int imm = static_cast<int>(strtol(®_field[2], &eimm, 10)); - field_len += eimm - ®_field[2]; - if (reg_num == 31) { - switch (reg_field[1]) { - case 'z': - imm *= (1 << instr->GetNEONLSSize()); - break; - case 'r': - imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes - : kQRegSizeInBytes; - break; - case 'b': - break; - } - AppendToOutput("#%d", imm); - return field_len; - } - break; - } - } + // Increase field length for registers tagged as stack. + if (format[1] != '(' && format[2] == 's') { + field_len = 3; } CPURegister::RegisterType reg_type = CPURegister::kRegister; unsigned reg_size = kXRegSize; - if (reg_prefix == 'F') { - switch (instr->GetFPType()) { - case 3: - reg_prefix = 'H'; - break; - case 0: - reg_prefix = 'S'; - break; - default: - reg_prefix = 'D'; - } + switch (reg_prefix) { + case 'R': + reg_prefix = instr->GetSixtyFourBits() ? 'X' : 'W'; + break; + case 'F': + switch (instr->GetFPType()) { + case 3: + reg_prefix = 'H'; + break; + case 0: + reg_prefix = 'S'; + break; + default: + reg_prefix = 'D'; + } } switch (reg_prefix) { @@ -9897,51 +5295,22 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr, reg_size = kQRegSize; break; case 'V': - if (reg_field[1] == 'v') { - reg_type = CPURegister::kVRegister; - reg_size = 1 << (instr->GetSVESize() + 3); - field_len++; - break; - } AppendToOutput("v%d", reg_num); return field_len; - case 'Z': - AppendToOutput("z%d", reg_num); - return field_len; default: VIXL_UNREACHABLE(); } + if ((reg_type == CPURegister::kRegister) && (reg_num == kZeroRegCode) && + (format[2] == 's')) { + reg_num = kSPRegInternalCode; + } + AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type)); return field_len; } -int Disassembler::SubstitutePredicateRegisterField(const Instruction *instr, - const char *format) { - VIXL_ASSERT(format[0] == 'P'); - switch (format[1]) { - // This field only supports P register that are always encoded in the same - // position. - case 'd': - case 't': - AppendToOutput("p%u", instr->GetPt()); - break; - case 'n': - AppendToOutput("p%u", instr->GetPn()); - break; - case 'm': - AppendToOutput("p%u", instr->GetPm()); - break; - case 'g': - VIXL_ASSERT(format[2] == 'l'); - AppendToOutput("p%u", instr->GetPgLow8()); - return 3; - default: - VIXL_UNREACHABLE(); - } - return 2; -} int Disassembler::SubstituteImmediateField(const Instruction *instr, const char *format) { @@ -10022,92 +5391,36 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 6; } case 'A': { // IAddSub. - int64_t imm = instr->GetImmAddSub() << (12 * instr->GetImmAddSubShift()); + VIXL_ASSERT(instr->GetShiftAddSub() <= 1); + int64_t imm = instr->GetImmAddSub() << (12 * instr->GetShiftAddSub()); AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm); return 7; } - case 'F': { // IFP, IFPNeon, IFPSve or IFPFBits. - int imm8 = 0; - int len = strlen("IFP"); - switch (format[3]) { - case 'F': - VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0); - AppendToOutput("#%" PRId32, 64 - instr->GetFPScale()); - return strlen("IFPFBits"); - case 'N': - VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0); - imm8 = instr->GetImmNEONabcdefgh(); - len += strlen("Neon"); - break; - case 'S': - VIXL_ASSERT(strncmp(format, "IFPSve", strlen("IFPSve")) == 0); - imm8 = instr->ExtractBits(12, 5); - len += strlen("Sve"); - break; - default: - VIXL_ASSERT(strncmp(format, "IFP", strlen("IFP")) == 0); - imm8 = instr->GetImmFP(); - break; + case 'F': { // IFPHalf, IFPSingle, IFPDouble, or IFPFBits. + if (format[3] == 'F') { // IFPFbits. + AppendToOutput("#%" PRId32, 64 - instr->GetFPScale()); + return 8; + } else { + AppendToOutput("#0x%" PRIx32 " (%.4f)", + instr->GetImmFP(), + format[3] == 'H' + ? FPToFloat(instr->GetImmFP16(), kIgnoreDefaultNaN) + : (format[3] == 'S') ? instr->GetImmFP32() + : instr->GetImmFP64()); + if (format[3] == 'H') { + return 7; + } else { + return 9; + } } - AppendToOutput("#0x%" PRIx32 " (%.4f)", - imm8, - Instruction::Imm8ToFP32(imm8)); - return len; } case 'H': { // IH - ImmHint AppendToOutput("#%" PRId32, instr->GetImmHint()); return 2; } case 'T': { // ITri - Immediate Triangular Encoded. - if (format[4] == 'S') { - VIXL_ASSERT((format[5] == 'v') && (format[6] == 'e')); - switch (format[7]) { - case 'l': - // SVE logical immediate encoding. - AppendToOutput("#0x%" PRIx64, instr->GetSVEImmLogical()); - return 8; - case 'p': { - // SVE predicated shift immediate encoding, lsl. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2( - /* is_predicated = */ true); - int lane_bits = 8 << shift_and_lane_size.second; - AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); - return 8; - } - case 'q': { - // SVE predicated shift immediate encoding, asr and lsr. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2( - /* is_predicated = */ true); - AppendToOutput("#%" PRId32, shift_and_lane_size.first); - return 8; - } - case 'r': { - // SVE unpredicated shift immediate encoding, lsl. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2( - /* is_predicated = */ false); - int lane_bits = 8 << shift_and_lane_size.second; - AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); - return 8; - } - case 's': { - // SVE unpredicated shift immediate encoding, asr and lsr. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2( - /* is_predicated = */ false); - AppendToOutput("#%" PRId32, shift_and_lane_size.first); - return 8; - } - default: - VIXL_UNREACHABLE(); - return 0; - } - } else { - AppendToOutput("#0x%" PRIx64, instr->GetImmLogical()); - return 4; - } + AppendToOutput("#0x%" PRIx64, instr->GetImmLogical()); + return 4; } case 'N': { // INzcv. int nzcv = (instr->GetNzcv() << Flags_offset); @@ -10129,21 +5442,12 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, AppendToOutput("#%" PRId32, instr->GetImmS()); return 8; } - case 't': { // It - Test and branch bit. + case 'S': { // IS - Test and branch bit. AppendToOutput("#%" PRId32, (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40()); return 2; } - case 'S': { // ISveSvl - SVE 'mul vl' immediate for structured ld/st. - VIXL_ASSERT(strncmp(format, "ISveSvl", 7) == 0); - int imm = instr->ExtractSignedBits(19, 16); - if (imm != 0) { - int reg_count = instr->ExtractBits(22, 21) + 1; - AppendToOutput(", #%d, mul vl", imm * reg_count); - } - return 7; - } case 's': { // Is - Shift (immediate). switch (format[2]) { case '1': { // Is1 - SSHR. @@ -10235,13 +5539,6 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } } return 0; - } else if (strncmp(format, - "IVInsSVEIndex", - strlen("IVInsSVEIndex")) == 0) { - std::pair<int, int> index_and_lane_size = - instr->GetSVEPermuteIndexAndLaneSizeLog2(); - AppendToOutput("%d", index_and_lane_size.first); - return strlen("IVInsSVEIndex"); } VIXL_FALLTHROUGH(); } @@ -10250,7 +5547,27 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, return 9; } case 'M': { // Modified Immediate cases. - if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { + if (strncmp(format, "IVMIImmFPHalf", strlen("IVMIImmFPHalf")) == 0) { + AppendToOutput("#0x%" PRIx32 " (%.4f)", + instr->GetImmNEONabcdefgh(), + FPToFloat(instr->GetImmNEONFP16(), + kIgnoreDefaultNaN)); + return strlen("IVMIImmFPHalf"); + } else if (strncmp(format, + "IVMIImmFPSingle", + strlen("IVMIImmFPSingle")) == 0) { + AppendToOutput("#0x%" PRIx32 " (%.4f)", + instr->GetImmNEONabcdefgh(), + instr->GetImmNEONFP32()); + return strlen("IVMIImmFPSingle"); + } else if (strncmp(format, + "IVMIImmFPDouble", + strlen("IVMIImmFPDouble")) == 0) { + AppendToOutput("#0x%" PRIx32 " (%.4f)", + instr->GetImmNEONabcdefgh(), + instr->GetImmNEONFP64()); + return strlen("IVMIImmFPDouble"); + } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { uint64_t imm8 = instr->GetImmNEONabcdefgh(); AppendToOutput("#0x%" PRIx64, imm8); return strlen("IVMIImm8"); @@ -10330,48 +5647,6 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr, } } } - case 'p': { // Ipc - SVE predicate constraint specifier. - VIXL_ASSERT(format[2] == 'c'); - unsigned pattern = instr->GetImmSVEPredicateConstraint(); - switch (pattern) { - // VL1-VL8 are encoded directly. - case SVE_VL1: - case SVE_VL2: - case SVE_VL3: - case SVE_VL4: - case SVE_VL5: - case SVE_VL6: - case SVE_VL7: - case SVE_VL8: - AppendToOutput("vl%u", pattern); - break; - // VL16-VL256 are encoded as log2(N) + c. - case SVE_VL16: - case SVE_VL32: - case SVE_VL64: - case SVE_VL128: - case SVE_VL256: - AppendToOutput("vl%u", 16 << (pattern - SVE_VL16)); - break; - // Special cases. - case SVE_POW2: - AppendToOutput("pow2"); - break; - case SVE_MUL4: - AppendToOutput("mul4"); - break; - case SVE_MUL3: - AppendToOutput("mul3"); - break; - case SVE_ALL: - AppendToOutput("all"); - break; - default: - AppendToOutput("#0x%x", pattern); - break; - } - return 3; - } default: { VIXL_UNIMPLEMENTED(); return 0; @@ -10461,11 +5736,11 @@ int Disassembler::SubstituteShiftField(const Instruction *instr, VIXL_ASSERT(instr->GetShiftDP() <= 0x3); switch (format[1]) { - case 'D': { // NDP. + case 'D': { // HDP. VIXL_ASSERT(instr->GetShiftDP() != ROR); VIXL_FALLTHROUGH(); } - case 'L': { // NLo. + case 'L': { // HLo. if (instr->GetImmDPShift() != 0) { const char *shift_type[] = {"lsl", "lsr", "asr", "ror"}; AppendToOutput(", %s #%" PRId32, @@ -10474,14 +5749,6 @@ int Disassembler::SubstituteShiftField(const Instruction *instr, } return 3; } - case 'S': { // NSveS (SVE structured load/store indexing shift). - VIXL_ASSERT(strncmp(format, "NSveS", 5) == 0); - int msz = instr->ExtractBits(24, 23); - if (msz > 0) { - AppendToOutput(", lsl #%d", msz); - } - return 5; - } default: VIXL_UNIMPLEMENTED(); return 0; @@ -10652,43 +5919,30 @@ int Disassembler::SubstituteLSRegOffsetField(const Instruction *instr, int Disassembler::SubstitutePrefetchField(const Instruction *instr, const char *format) { - VIXL_ASSERT(format[0] == 'p'); + VIXL_ASSERT(format[0] == 'P'); USE(format); - bool is_sve = - (strncmp(format, "prefSVEOp", strlen("prefSVEOp")) == 0) ? true : false; - int placeholder_length = is_sve ? 9 : 6; + static const char *hints[] = {"ld", "li", "st"}; static const char *stream_options[] = {"keep", "strm"}; - auto get_hints = [](bool is_sve) -> std::vector<std::string> { - static const std::vector<std::string> sve_hints = {"ld", "st"}; - static const std::vector<std::string> core_hints = {"ld", "li", "st"}; - return (is_sve) ? sve_hints : core_hints; - }; - - std::vector<std::string> hints = get_hints(is_sve); - unsigned hint = - is_sve ? instr->GetSVEPrefetchHint() : instr->GetPrefetchHint(); + unsigned hint = instr->GetPrefetchHint(); unsigned target = instr->GetPrefetchTarget() + 1; unsigned stream = instr->GetPrefetchStream(); - if ((hint >= hints.size()) || (target > 3)) { + if ((hint >= ArrayLength(hints)) || (target > 3)) { // Unallocated prefetch operations. - if (is_sve) { - std::bitset<4> prefetch_mode(instr->GetSVEImmPrefetchOperation()); - AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); - } else { - std::bitset<5> prefetch_mode(instr->GetImmPrefetchOperation()); - AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); - } + int prefetch_mode = instr->GetImmPrefetchOperation(); + AppendToOutput("#0b%c%c%c%c%c", + (prefetch_mode & (1 << 4)) ? '1' : '0', + (prefetch_mode & (1 << 3)) ? '1' : '0', + (prefetch_mode & (1 << 2)) ? '1' : '0', + (prefetch_mode & (1 << 1)) ? '1' : '0', + (prefetch_mode & (1 << 0)) ? '1' : '0'); } else { VIXL_ASSERT(stream < ArrayLength(stream_options)); - AppendToOutput("p%sl%d%s", - hints[hint].c_str(), - target, - stream_options[stream]); + AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]); } - return placeholder_length; + return 6; } int Disassembler::SubstituteBarrierField(const Instruction *instr, @@ -10743,159 +5997,6 @@ int Disassembler::SubstituteCrField(const Instruction *instr, return 2; } -int Disassembler::SubstituteIntField(const Instruction *instr, - const char *format) { - VIXL_ASSERT((format[0] == 'u') || (format[0] == 's')); - - // A generic signed or unsigned int field uses a placeholder of the form - // 'sAABB and 'uAABB respectively where AA and BB are two digit bit positions - // between 00 and 31, and AA >= BB. The placeholder is substituted with the - // decimal integer represented by the bits in the instruction between - // positions AA and BB inclusive. - // - // In addition, split fields can be represented using 'sAABB:CCDD, where CCDD - // become the least-significant bits of the result, and bit AA is the sign bit - // (if 's is used). - int32_t bits = 0; - int width = 0; - const char *c = format; - do { - c++; // Skip the 'u', 's' or ':'. - VIXL_ASSERT(strspn(c, "0123456789") == 4); - int msb = ((c[0] - '0') * 10) + (c[1] - '0'); - int lsb = ((c[2] - '0') * 10) + (c[3] - '0'); - c += 4; // Skip the characters we just read. - int chunk_width = msb - lsb + 1; - VIXL_ASSERT((chunk_width > 0) && (chunk_width < 32)); - bits = (bits << chunk_width) | (instr->ExtractBits(msb, lsb)); - width += chunk_width; - } while (*c == ':'); - VIXL_ASSERT(IsUintN(width, bits)); - - if (format[0] == 's') { - bits = ExtractSignedBitfield32(width - 1, 0, bits); - } - - if (*c == '+') { - // A "+n" trailing the format specifier indicates the extracted value should - // be incremented by n. This is for cases where the encoding is zero-based, - // but range of values is not, eg. values [1, 16] encoded as [0, 15] - char *new_c; - uint64_t value = strtoul(c + 1, &new_c, 10); - c = new_c; - VIXL_ASSERT(IsInt32(value)); - bits += value; - } else if (*c == '*') { - // Similarly, a "*n" trailing the format specifier indicates the extracted - // value should be multiplied by n. This is for cases where the encoded - // immediate is scaled, for example by access size. - char *new_c; - uint64_t value = strtoul(c + 1, &new_c, 10); - c = new_c; - VIXL_ASSERT(IsInt32(value)); - bits *= value; - } - - AppendToOutput("%d", bits); - - return static_cast<int>(c - format); -} - -int Disassembler::SubstituteSVESize(const Instruction *instr, - const char *format) { - USE(format); - VIXL_ASSERT(format[0] == 't'); - - static const char sizes[] = {'b', 'h', 's', 'd', 'q'}; - // TODO: only the most common case for <size> is supported at the moment, - // and even then, the RESERVED values are handled as if they're not - // reserved. - unsigned size_in_bytes_log2 = instr->GetSVESize(); - int placeholder_length = 1; - switch (format[1]) { - case 'l': - placeholder_length++; - if (format[2] == 's') { - // 'tls: Loads and stores - size_in_bytes_log2 = instr->ExtractBits(22, 21); - placeholder_length++; - if (format[3] == 's') { - // Sign extension load. - unsigned msize = instr->ExtractBits(24, 23); - if (msize > size_in_bytes_log2) size_in_bytes_log2 ^= 0x3; - placeholder_length++; - } - } else { - // 'tl: Logical operations - size_in_bytes_log2 = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); - } - break; - case 'm': // 'tmsz - VIXL_ASSERT(strncmp(format, "tmsz", 4) == 0); - placeholder_length += 3; - size_in_bytes_log2 = instr->ExtractBits(24, 23); - break; - case 's': - if (format[2] == 'z') { - VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') || - (format[3] == 'p')); - if (format[3] == 'x') { - // 'tszx: Indexes. - std::pair<int, int> index_and_lane_size = - instr->GetSVEPermuteIndexAndLaneSizeLog2(); - size_in_bytes_log2 = index_and_lane_size.second; - } else if (format[3] == 'p') { - // 'tszp: Predicated shifts. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); - size_in_bytes_log2 = shift_and_lane_size.second; - } else { - // 'tszs: Unpredicated shifts. - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); - size_in_bytes_log2 = shift_and_lane_size.second; - } - placeholder_length += 3; // skip `sz[x|s]` - } - break; - case 'h': - // Half size of the lane size field. - size_in_bytes_log2 -= 1; - placeholder_length++; - break; - case 'q': - // Quarter size of the lane size field. - size_in_bytes_log2 -= 2; - placeholder_length++; - break; - default: - break; - } - - VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(sizes)); - AppendToOutput("%c", sizes[size_in_bytes_log2]); - - return placeholder_length; -} - -int Disassembler::SubstituteTernary(const Instruction *instr, - const char *format) { - VIXL_ASSERT((format[0] == '?') && (format[3] == ':')); - - // The ternary substitution of the format "'?bb:TF" is replaced by a single - // character, either T or F, depending on the value of the bit at position - // bb in the instruction. For example, "'?31:xw" is substituted with "x" if - // bit 31 is true, and "w" otherwise. - VIXL_ASSERT(strspn(&format[1], "0123456789") == 2); - char *c; - uint64_t value = strtoul(&format[1], &c, 10); - VIXL_ASSERT(value < (kInstructionSize * kBitsPerByte)); - VIXL_ASSERT((*c == ':') && (strlen(c) >= 3)); // Minimum of ":TF" - c++; - AppendToOutput("%c", c[1 - instr->ExtractBit(static_cast<int>(value))]); - return 6; -} - void Disassembler::ResetOutput() { buffer_pos_ = 0; buffer_[buffer_pos_] = 0; diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h index b59840aa..c650bee9 100644 --- a/src/aarch64/disasm-aarch64.h +++ b/src/aarch64/disasm-aarch64.h @@ -27,8 +27,6 @@ #ifndef VIXL_AARCH64_DISASM_AARCH64_H #define VIXL_AARCH64_DISASM_AARCH64_H -#include <utility> - #include "../globals-vixl.h" #include "../utils-vixl.h" @@ -114,13 +112,10 @@ class Disassembler : public DecoderVisitor { private: void Format(const Instruction* instr, const char* mnemonic, - const char* format0, - const char* format1 = NULL); + const char* format); void Substitute(const Instruction* instr, const char* string); int SubstituteField(const Instruction* instr, const char* format); int SubstituteRegisterField(const Instruction* instr, const char* format); - int SubstitutePredicateRegisterField(const Instruction* instr, - const char* format); int SubstituteImmediateField(const Instruction* instr, const char* format); int SubstituteLiteralField(const Instruction* instr, const char* format); int SubstituteBitfieldImmediateField(const Instruction* instr, @@ -135,14 +130,6 @@ class Disassembler : public DecoderVisitor { int SubstituteBarrierField(const Instruction* instr, const char* format); int SubstituteSysOpField(const Instruction* instr, const char* format); int SubstituteCrField(const Instruction* instr, const char* format); - int SubstituteIntField(const Instruction* instr, const char* format); - int SubstituteSVESize(const Instruction* instr, const char* format); - int SubstituteTernary(const Instruction* instr, const char* format); - - std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr, - char reg_prefix, - const char* field); - bool RdIsZROrSP(const Instruction* instr) const { return (instr->GetRd() == kZeroRegCode); } diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc index b3e28384..a99a0459 100644 --- a/src/aarch64/instructions-aarch64.cc +++ b/src/aarch64/instructions-aarch64.cc @@ -35,8 +35,7 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, unsigned width) { VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) || (width == 32)); - VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) || - (reg_size == kSRegSize) || (reg_size == kDRegSize)); + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); uint64_t result = value & ((UINT64_C(1) << width) - 1); for (unsigned i = width; i < reg_size; i *= 2) { result |= (result << i); @@ -44,503 +43,6 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size, return result; } -bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const { - bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z; - bool movprfx_is_unpredicated = - movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z; - VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated); - - int movprfx_zd = movprfx->GetRd(); - int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1; - VectorFormat movprfx_vform = - movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined; - - bool pg_matches_low8 = movprfx_pg == GetPgLow8(); - bool vform_matches = movprfx_vform == GetSVEVectorFormat(); - bool zd_matches = movprfx_zd == GetRd(); - bool zd_matches_zm = movprfx_zd == GetRm(); - bool zd_matches_zn = movprfx_zd == GetRn(); - - switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) { - case AND_z_zi: - case EOR_z_zi: - case ORR_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEBitwiseLogical_PredicatedMask)) { - case AND_z_p_zz: - case BIC_z_p_zz: - case EOR_z_p_zz: - case ORR_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) { - case ASRD_z_p_zi: - case ASR_z_p_zi: - case LSL_z_p_zi: - case LSR_z_p_zi: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - unsigned tsz = ExtractBits<0x00c00300>(); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - case ASR_z_p_zz: - case LSLR_z_p_zz: - case LSL_z_p_zz: - case LSRR_z_p_zz: - case LSR_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { - case ASR_z_p_zw: - case LSL_z_p_zw: - case LSR_z_p_zw: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) { - case CLASTA_z_p_zz: - case CLASTB_z_p_zz: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVECopyFPImm_PredicatedMask)) { - case FCPY_z_p_i: - if (movprfx_is_predicated) { - if (!vform_matches) return false; - if (movprfx_pg != GetRx<19, 16>()) return false; - } - return zd_matches; - } - switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { - case CPY_z_p_r: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches; - } - switch (Mask(SVECopyIntImm_PredicatedMask)) { - case CPY_z_p_i: - if (movprfx_is_predicated) { - if (!vform_matches) return false; - if (movprfx_pg != GetRx<19, 16>()) return false; - } - // Only the merging form can take movprfx. - if (ExtractBit(14) == 0) return false; - return zd_matches; - } - switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { - case CPY_z_p_v: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) { - case FADD_z_p_zs: - case FMAXNM_z_p_zs: - case FMAX_z_p_zs: - case FMINNM_z_p_zs: - case FMIN_z_p_zs: - case FMUL_z_p_zs: - case FSUBR_z_p_zs: - case FSUB_z_p_zs: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches; - } - switch (Mask(SVEFPArithmetic_PredicatedMask)) { - case FABD_z_p_zz: - case FADD_z_p_zz: - case FDIVR_z_p_zz: - case FDIV_z_p_zz: - case FMAXNM_z_p_zz: - case FMAX_z_p_zz: - case FMINNM_z_p_zz: - case FMIN_z_p_zz: - case FMULX_z_p_zz: - case FMUL_z_p_zz: - case FSCALE_z_p_zz: - case FSUBR_z_p_zz: - case FSUB_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEFPComplexAdditionMask)) { - case FCADD_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEFPComplexMulAddIndexMask)) { - case FCMLA_z_zzzi_h: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case FCMLA_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPComplexMulAddMask)) { - case FCMLA_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEFPConvertPrecisionMask)) { - case FCVT_z_p_z_d2h: - case FCVT_z_p_z_d2s: - case FCVT_z_p_z_h2d: - case FCVT_z_p_z_h2s: - case FCVT_z_p_z_s2d: - case FCVT_z_p_z_s2h: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPConvertToIntMask)) { - case FCVTZS_z_p_z_d2w: - case FCVTZS_z_p_z_d2x: - case FCVTZS_z_p_z_fp162h: - case FCVTZS_z_p_z_fp162w: - case FCVTZS_z_p_z_fp162x: - case FCVTZS_z_p_z_s2w: - case FCVTZS_z_p_z_s2x: - case FCVTZU_z_p_z_d2w: - case FCVTZU_z_p_z_d2x: - case FCVTZU_z_p_z_fp162h: - case FCVTZU_z_p_z_fp162w: - case FCVTZU_z_p_z_fp162x: - case FCVTZU_z_p_z_s2w: - case FCVTZU_z_p_z_s2x: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - // The movprfx element size must match the instruction's maximum encoded - // element size. We have to partially decode the opc and opc2 fields to - // find this. - unsigned opc = ExtractBits(23, 22); - unsigned opc2 = ExtractBits(18, 17); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPMulAddIndexMask)) { - case FMLA_z_zzzi_h: - case FMLA_z_zzzi_h_i3h: - case FMLA_z_zzzi_s: - case FMLS_z_zzzi_h: - case FMLS_z_zzzi_h_i3h: - case FMLS_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case FMLA_z_zzzi_d: - case FMLS_z_zzzi_d: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPMulAddMask)) { - case FMAD_z_p_zzz: - case FMSB_z_p_zzz: - case FNMAD_z_p_zzz: - case FNMSB_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<20, 16>()) return false; - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - case FMLA_z_p_zzz: - case FMLS_z_p_zzz: - case FNMLA_z_p_zzz: - case FNMLS_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEFPRoundToIntegralValueMask)) { - case FRINTA_z_p_z: - case FRINTI_z_p_z: - case FRINTM_z_p_z: - case FRINTN_z_p_z: - case FRINTP_z_p_z: - case FRINTX_z_p_z: - case FRINTZ_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEFPTrigMulAddCoefficientMask)) { - case FTMAD_z_zzi: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEFPUnaryOpMask)) { - case FRECPX_z_p_z: - case FSQRT_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEIncDecByPredicateCountMask)) { - case DECP_z_p_z: - case INCP_z_p_z: - case SQDECP_z_p_z: - case SQINCP_z_p_z: - case UQDECP_z_p_z: - case UQINCP_z_p_z: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIncDecVectorByElementCountMask)) { - case DECD_z_zs: - case DECH_z_zs: - case DECW_z_zs: - case INCD_z_zs: - case INCH_z_zs: - case INCW_z_zs: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEInsertGeneralRegisterMask)) { - case INSR_z_r: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) { - case INSR_z_v: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { - case ADD_z_zi: - case SQADD_z_zi: - case SQSUB_z_zi: - case SUBR_z_zi: - case SUB_z_zi: - case UQADD_z_zi: - case UQSUB_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) { - case ADD_z_p_zz: - case SUBR_z_p_zz: - case SUB_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntConvertToFPMask)) { - case SCVTF_z_p_z_h2fp16: - case SCVTF_z_p_z_w2d: - case SCVTF_z_p_z_w2fp16: - case SCVTF_z_p_z_w2s: - case SCVTF_z_p_z_x2d: - case SCVTF_z_p_z_x2fp16: - case SCVTF_z_p_z_x2s: - case UCVTF_z_p_z_h2fp16: - case UCVTF_z_p_z_w2d: - case UCVTF_z_p_z_w2fp16: - case UCVTF_z_p_z_w2s: - case UCVTF_z_p_z_x2d: - case UCVTF_z_p_z_x2fp16: - case UCVTF_z_p_z_x2s: - if (movprfx_is_predicated) { - if (!pg_matches_low8) return false; - // The movprfx element size must match the instruction's maximum encoded - // element size. We have to partially decode the opc and opc2 fields to - // find this. - unsigned opc = ExtractBits(23, 22); - unsigned opc2 = ExtractBits(18, 17); - VectorFormat instr_vform = - SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2)); - if (movprfx_vform != instr_vform) return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - case SDIV_z_p_zz: - case UDIVR_z_p_zz: - case UDIV_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) { - case SABD_z_p_zz: - case SMAX_z_p_zz: - case SMIN_z_p_zz: - case UABD_z_p_zz: - case UMAX_z_p_zz: - case UMIN_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) { - case SMAX_z_zi: - case SMIN_z_zi: - case UMAX_z_zi: - case UMIN_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntMulAddPredicatedMask)) { - case MAD_z_p_zzz: - case MSB_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches && !zd_matches_zm; - case MLA_z_p_zzz: - case MLS_z_p_zzz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zm && !zd_matches_zn; - } - switch (Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: - case UDOT_z_zzz: - return movprfx_is_unpredicated && zd_matches && !zd_matches_zm && - !zd_matches_zn; - } - switch (Mask(SVEIntMulImm_UnpredicatedMask)) { - case MUL_z_zi: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEIntMulVectors_PredicatedMask)) { - case MUL_z_p_zz: - case SMULH_z_p_zz: - case UMULH_z_p_zz: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return zd_matches; - } - switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) { - case ABS_z_p_z: - case CLS_z_p_z: - case CLZ_z_p_z: - case CNOT_z_p_z: - case CNT_z_p_z: - case FABS_z_p_z: - case FNEG_z_p_z: - case NEG_z_p_z: - case NOT_z_p_z: - case SXTB_z_p_z: - case SXTH_z_p_z: - case SXTW_z_p_z: - case UXTB_z_p_z: - case UXTH_z_p_z: - case UXTW_z_p_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_s: - case UDOT_z_zzzi_s: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<18, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - case SDOT_z_zzzi_d: - case UDOT_z_zzzi_d: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<19, 16>()) return false; - return movprfx_is_unpredicated && zd_matches && !zd_matches_zn; - } - switch (Mask(SVEPermuteVectorExtractMask)) { - case EXT_z_zi_des: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEReverseWithinElementsMask)) { - case RBIT_z_p_z: - case REVB_z_z: - case REVH_z_z: - case REVW_z_z: - if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) { - return false; - } - return zd_matches && !zd_matches_zn; - } - switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) { - case SQDECD_z_zs: - case SQDECH_z_zs: - case SQDECW_z_zs: - case SQINCD_z_zs: - case SQINCH_z_zs: - case SQINCW_z_zs: - case UQDECD_z_zs: - case UQDECH_z_zs: - case UQDECW_z_zs: - case UQINCD_z_zs: - case UQINCH_z_zs: - case UQINCW_z_zs: - return movprfx_is_unpredicated && zd_matches; - } - switch (Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - // The movprfx's `zd` must not alias any other inputs. - if (movprfx_zd == GetRx<9, 5>()) return false; - return movprfx_is_unpredicated && zd_matches; - } - return false; -} // NOLINT(readability/fn_size) bool Instruction::IsLoad() const { if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { @@ -601,16 +103,6 @@ bool Instruction::IsStore() const { } -std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const { - uint32_t imm_2 = ExtractBits<0x00C00000>(); - uint32_t tsz_5 = ExtractBits<0x001F0000>(); - uint32_t imm_7 = (imm_2 << 5) | tsz_5; - int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5); - int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7); - return std::make_pair(index, lane_size_in_byte_log_2); -} - - // Logical immediates can't encode zero, so a return value of zero is used to // indicate a failure case. Specifically, where the constraints on imm_s are // not met. @@ -619,108 +111,7 @@ uint64_t Instruction::GetImmLogical() const { int32_t n = GetBitN(); int32_t imm_s = GetImmSetBits(); int32_t imm_r = GetImmRotate(); - return DecodeImmBitMask(n, imm_s, imm_r, reg_size); -} - -// Logical immediates can't encode zero, so a return value of zero is used to -// indicate a failure case. Specifically, where the constraints on imm_s are -// not met. -uint64_t Instruction::GetSVEImmLogical() const { - int n = GetSVEBitN(); - int imm_s = GetSVEImmSetBits(); - int imm_r = GetSVEImmRotate(); - int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2(); - switch (lane_size_in_bytes_log2) { - case kDRegSizeInBytesLog2: - case kSRegSizeInBytesLog2: - case kHRegSizeInBytesLog2: - case kBRegSizeInBytesLog2: { - int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3); - return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits); - } - default: - return 0; - } -} -std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2( - bool is_predicated) const { - Instr tsize = - is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>(); - Instr imm_3 = - is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>(); - if (tsize == 0) { - // The bit field `tsize` means undefined if it is zero, so return a - // convenience value kWMinInt to indicate a failure case. - return std::make_pair(kWMinInt, kWMinInt); - } - - int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1; - int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte; - int shift = (2 * esize) - ((tsize << 3) | imm_3); - return std::make_pair(shift, lane_size_in_bytes_log_2); -} - -int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const { - Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb); - if (is_signed) { - dtype_h = dtype_h ^ 0x3; - } - return dtype_h; -} - -int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const { - Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb); - if (is_signed) { - dtype_l = dtype_l ^ 0x3; - } - return dtype_l; -} - -int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const { - int n = GetSVEBitN(); - int imm_s = GetSVEImmSetBits(); - unsigned type_bitset = - (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width)); - - // An lane size is constructed from the n and imm_s bits according to - // the following table: - // - // N imms size - // 0 0xxxxx 32 - // 0 10xxxx 16 - // 0 110xxx 8 - // 0 1110xx 8 - // 0 11110x 8 - // 1 xxxxxx 64 - - if (type_bitset == 0) { - // Bail out early since `HighestSetBitPosition` doesn't accept zero - // value input. - return -1; - } - - switch (HighestSetBitPosition(type_bitset)) { - case 6: - return kDRegSizeInBytesLog2; - case 5: - return kSRegSizeInBytesLog2; - case 4: - return kHRegSizeInBytesLog2; - case 3: - case 2: - case 1: - return kBRegSizeInBytesLog2; - default: - // RESERVED encoding. - return -1; - } -} - -uint64_t Instruction::DecodeImmBitMask(int32_t n, - int32_t imm_s, - int32_t imm_r, - int32_t size) const { // An integer is constructed from the n, imm_s and imm_r bits according to // the following table: // @@ -755,7 +146,7 @@ uint64_t Instruction::DecodeImmBitMask(int32_t n, return 0; } uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1; - return RepeatBitsAcrossReg(size, + return RepeatBitsAcrossReg(reg_size, RotateRight(bits, imm_r & mask, width), width); } @@ -1006,6 +397,8 @@ void Instruction::SetImmLLiteral(const Instruction* source) { VectorFormat VectorFormatHalfWidth(VectorFormat vform) { + VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D || + vform == kFormatH || vform == kFormatS || vform == kFormatD); switch (vform) { case kFormat8H: return kFormat8B; @@ -1019,13 +412,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) { return kFormatH; case kFormatD: return kFormatS; - case kFormatVnH: - return kFormatVnB; - case kFormatVnS: - return kFormatVnH; - case kFormatVnD: - return kFormatVnS; - break; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -1094,12 +480,6 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) { return kFormat2S; case kFormat2D: return kFormat4S; - case kFormatVnH: - return kFormatVnB; - case kFormatVnS: - return kFormatVnH; - case kFormatVnD: - return kFormatVnS; default: VIXL_UNREACHABLE(); return kFormatUndefined; @@ -1138,8 +518,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) { } -VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) { - switch (lane_size_in_bits) { +VectorFormat ScalarFormatFromLaneSize(int laneSize) { + switch (laneSize) { case 8: return kFormatB; case 16: @@ -1155,69 +535,6 @@ VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) { } -bool IsSVEFormat(VectorFormat vform) { - switch (vform) { - case kFormatVnB: - case kFormatVnH: - case kFormatVnS: - case kFormatVnD: - case kFormatVnQ: - return true; - default: - return false; - } -} - - -VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) { - switch (lane_size_in_bytes) { - case 1: - return kFormatVnB; - case 2: - return kFormatVnH; - case 4: - return kFormatVnS; - case 8: - return kFormatVnD; - case 16: - return kFormatVnQ; - default: - VIXL_UNREACHABLE(); - return kFormatUndefined; - } -} - - -VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) { - switch (lane_size_in_bits) { - case 8: - case 16: - case 32: - case 64: - case 128: - return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte); - default: - VIXL_UNREACHABLE(); - return kFormatUndefined; - } -} - - -VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) { - switch (lane_size_in_bytes_log2) { - case 0: - case 1: - case 2: - case 3: - case 4: - return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2); - default: - VIXL_UNREACHABLE(); - return kFormatUndefined; - } -} - - VectorFormat ScalarFormatFromFormat(VectorFormat vform) { return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); } @@ -1225,7 +542,6 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) { unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { VIXL_ASSERT(vform != kFormatUndefined); - VIXL_ASSERT(!IsSVEFormat(vform)); switch (vform) { case kFormatB: return kBRegSize; @@ -1235,19 +551,14 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { case kFormat2H: return kSRegSize; case kFormatD: + return kDRegSize; case kFormat8B: case kFormat4H: case kFormat2S: case kFormat1D: return kDRegSize; - case kFormat16B: - case kFormat8H: - case kFormat4S: - case kFormat2D: - return kQRegSize; default: - VIXL_UNREACHABLE(); - return 0; + return kQRegSize; } } @@ -1263,26 +574,20 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) { case kFormatB: case kFormat8B: case kFormat16B: - case kFormatVnB: return 8; case kFormatH: case kFormat2H: case kFormat4H: case kFormat8H: - case kFormatVnH: return 16; case kFormatS: case kFormat2S: case kFormat4S: - case kFormatVnS: return 32; case kFormatD: case kFormat1D: case kFormat2D: - case kFormatVnD: return 64; - case kFormatVnQ: - return 128; default: VIXL_UNREACHABLE(); return 0; @@ -1301,26 +606,20 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) { case kFormatB: case kFormat8B: case kFormat16B: - case kFormatVnB: return 0; case kFormatH: case kFormat2H: case kFormat4H: case kFormat8H: - case kFormatVnH: return 1; case kFormatS: case kFormat2S: case kFormat4S: - case kFormatVnS: return 2; case kFormatD: case kFormat1D: case kFormat2D: - case kFormatVnD: return 3; - case kFormatVnQ: - return 4; default: VIXL_UNREACHABLE(); return 0; @@ -1398,19 +697,17 @@ bool IsVectorFormat(VectorFormat vform) { int64_t MaxIntFromFormat(VectorFormat vform) { - int lane_size = LaneSizeInBitsFromFormat(vform); - return static_cast<int64_t>(GetUintMask(lane_size) >> 1); + return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); } int64_t MinIntFromFormat(VectorFormat vform) { - return -MaxIntFromFormat(vform) - 1; + return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform)); } uint64_t MaxUintFromFormat(VectorFormat vform) { - return GetUintMask(LaneSizeInBitsFromFormat(vform)); + return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform)); } - } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h index 5f56ae16..6d4f96b4 100644 --- a/src/aarch64/instructions-aarch64.h +++ b/src/aarch64/instructions-aarch64.h @@ -81,7 +81,6 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff); const uint64_t kHRegMask = UINT64_C(0xffff); const uint64_t kSRegMask = UINT64_C(0xffffffff); const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff); -const uint64_t kHSignMask = UINT64_C(0x8000); const uint64_t kSSignMask = UINT64_C(0x80000000); const uint64_t kDSignMask = UINT64_C(0x8000000000000000); const uint64_t kWSignMask = UINT64_C(0x80000000); @@ -117,30 +116,6 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000)); const uint64_t kTTBRMask = UINT64_C(1) << 55; -// We can't define a static kZRegSize because the size depends on the -// implementation. However, it is sometimes useful to know the minimum and -// maxmimum possible sizes. -const unsigned kZRegMinSize = 128; -const unsigned kZRegMinSizeLog2 = 7; -const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8; -const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3; -const unsigned kZRegMaxSize = 2048; -const unsigned kZRegMaxSizeLog2 = 11; -const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8; -const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3; - -// The P register size depends on the Z register size. -const unsigned kZRegBitsPerPRegBit = kBitsPerByte; -const unsigned kZRegBitsPerPRegBitLog2 = 3; -const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit; -const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3; -const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8; -const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3; -const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit; -const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3; -const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8; -const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3; - // Make these moved float constants backwards compatible // with explicit vixl::aarch64:: namespace references. using vixl::kDoubleMantissaBits; @@ -176,44 +151,6 @@ enum AddrMode { Offset, PreIndex, PostIndex }; enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister }; -enum VectorFormat { - kFormatUndefined = 0xffffffff, - kFormat8B = NEON_8B, - kFormat16B = NEON_16B, - kFormat4H = NEON_4H, - kFormat8H = NEON_8H, - kFormat2S = NEON_2S, - kFormat4S = NEON_4S, - kFormat1D = NEON_1D, - kFormat2D = NEON_2D, - - // Scalar formats. We add the scalar bit to distinguish between scalar and - // vector enumerations; the bit is always set in the encoding of scalar ops - // and always clear for vector ops. Although kFormatD and kFormat1D appear - // to be the same, their meaning is subtly different. The first is a scalar - // operation, the second a vector operation that only affects one lane. - kFormatB = NEON_B | NEONScalar, - kFormatH = NEON_H | NEONScalar, - kFormatS = NEON_S | NEONScalar, - kFormatD = NEON_D | NEONScalar, - - // An artificial value, used to distinguish from NEON format category. - kFormatSVE = 0x0000fffd, - // An artificial value. Q lane size isn't encoded in the usual size field. - kFormatSVEQ = 0x000f0000, - // Vector element width of SVE register with the unknown lane count since - // the vector length is implementation dependent. - kFormatVnB = SVE_B | kFormatSVE, - kFormatVnH = SVE_H | kFormatSVE, - kFormatVnS = SVE_S | kFormatSVE, - kFormatVnD = SVE_D | kFormatSVE, - kFormatVnQ = kFormatSVEQ | kFormatSVE, - - // An artificial value, used by simulator trace tests and a few oddball - // instructions (such as FMLAL). - kFormat2H = 0xfffffffe -}; - // Instructions. --------------------------------------------------------------- class Instruction { @@ -292,29 +229,6 @@ class Instruction { INSTRUCTION_FIELDS_LIST(DEFINE_GETTER) #undef DEFINE_GETTER - template <int msb, int lsb> - int32_t GetRx() const { - // We don't have any register fields wider than five bits, so the result - // will always fit into an int32_t. - VIXL_ASSERT((msb - lsb + 1) <= 5); - return this->ExtractBits(msb, lsb); - } - - VectorFormat GetSVEVectorFormat() const { - switch (Mask(SVESizeFieldMask)) { - case SVE_B: - return kFormatVnB; - case SVE_H: - return kFormatVnH; - case SVE_S: - return kFormatVnS; - case SVE_D: - return kFormatVnD; - } - VIXL_UNREACHABLE(); - return kFormatUndefined; - } - // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), // formed from ImmPCRelLo and ImmPCRelHi. int GetImmPCRel() const { @@ -340,20 +254,6 @@ class Instruction { VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) { return GetImmLogical(); } - uint64_t GetSVEImmLogical() const; - int GetSVEBitwiseImmLaneSizeInBytesLog2() const; - uint64_t DecodeImmBitMask(int32_t n, - int32_t imm_s, - int32_t imm_r, - int32_t size) const; - - std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const; - - std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const; - - int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const; - - int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const; unsigned GetImmNEONabcdefgh() const; VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) { @@ -380,16 +280,6 @@ class Instruction { return GetImmNEONFP64(); } - Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); } - - float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); } - - double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); } - - static Float16 Imm8ToFloat16(uint32_t imm8); - static float Imm8ToFP32(uint32_t imm8); - static double Imm8ToFP64(uint32_t imm8); - unsigned GetSizeLS() const { return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask))); } @@ -452,9 +342,6 @@ class Instruction { return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed; } - // True if `this` is valid immediately after the provided movprfx instruction. - bool CanTakeSVEMovprfx(Instruction const* movprfx) const; - bool IsLoad() const; bool IsStore() const; @@ -670,12 +557,41 @@ class Instruction { private: int GetImmBranch() const; + static Float16 Imm8ToFloat16(uint32_t imm8); + static float Imm8ToFP32(uint32_t imm8); + static double Imm8ToFP64(uint32_t imm8); + void SetPCRelImmTarget(const Instruction* target); void SetBranchImmTarget(const Instruction* target); }; -// Functions for handling NEON and SVE vector format information. +// Functions for handling NEON vector format information. +enum VectorFormat { + kFormatUndefined = 0xffffffff, + kFormat8B = NEON_8B, + kFormat16B = NEON_16B, + kFormat4H = NEON_4H, + kFormat8H = NEON_8H, + kFormat2S = NEON_2S, + kFormat4S = NEON_4S, + kFormat1D = NEON_1D, + kFormat2D = NEON_2D, + + // Scalar formats. We add the scalar bit to distinguish between scalar and + // vector enumerations; the bit is always set in the encoding of scalar ops + // and always clear for vector ops. Although kFormatD and kFormat1D appear + // to be the same, their meaning is subtly different. The first is a scalar + // operation, the second a vector operation that only affects one lane. + kFormatB = NEON_B | NEONScalar, + kFormatH = NEON_H | NEONScalar, + kFormatS = NEON_S | NEONScalar, + kFormatD = NEON_D | NEONScalar, + + // An artificial value, used by simulator trace tests and a few oddball + // instructions (such as FMLAL). + kFormat2H = 0xfffffffe +}; const int kMaxLanesPerVector = 16; @@ -683,16 +599,12 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform); VectorFormat VectorFormatDoubleWidth(VectorFormat vform); VectorFormat VectorFormatDoubleLanes(VectorFormat vform); VectorFormat VectorFormatHalfLanes(VectorFormat vform); -VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits); +VectorFormat ScalarFormatFromLaneSize(int lanesize); VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform); VectorFormat VectorFormatFillQ(VectorFormat vform); VectorFormat ScalarFormatFromFormat(VectorFormat vform); -VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits); -VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes); -VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2); unsigned RegisterSizeInBitsFromFormat(VectorFormat vform); unsigned RegisterSizeInBytesFromFormat(VectorFormat vform); -bool IsSVEFormat(VectorFormat vform); // TODO: Make the return types of these functions consistent. unsigned LaneSizeInBitsFromFormat(VectorFormat vform); int LaneSizeInBytesFromFormat(VectorFormat vform); diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc new file mode 100644 index 00000000..7cb6b20e --- /dev/null +++ b/src/aarch64/instrument-aarch64.cc @@ -0,0 +1,975 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "instrument-aarch64.h" + +namespace vixl { +namespace aarch64 { + +Counter::Counter(const char* name, CounterType type) + : count_(0), enabled_(false), type_(type) { + VIXL_ASSERT(name != NULL); + strncpy(name_, name, kCounterNameMaxLength - 1); + // Make sure `name_` is always NULL-terminated, even if the source's length is + // higher. + name_[kCounterNameMaxLength - 1] = '\0'; +} + + +void Counter::Enable() { enabled_ = true; } + + +void Counter::Disable() { enabled_ = false; } + + +bool Counter::IsEnabled() { return enabled_; } + + +void Counter::Increment() { + if (enabled_) { + count_++; + } +} + + +uint64_t Counter::GetCount() { + uint64_t result = count_; + if (type_ == Gauge) { + // If the counter is a Gauge, reset the count after reading. + count_ = 0; + } + return result; +} + + +const char* Counter::GetName() { return name_; } + + +CounterType Counter::GetType() { return type_; } + + +struct CounterDescriptor { + const char* name; + CounterType type; +}; + + +static const CounterDescriptor kCounterList[] = + {{"Instruction", Cumulative}, + + {"Move Immediate", Gauge}, + {"Add/Sub DP", Gauge}, + {"Logical DP", Gauge}, + {"Other Int DP", Gauge}, + {"FP DP", Gauge}, + + {"Conditional Select", Gauge}, + {"Conditional Compare", Gauge}, + + {"Unconditional Branch", Gauge}, + {"Compare and Branch", Gauge}, + {"Test and Branch", Gauge}, + {"Conditional Branch", Gauge}, + + {"Load Integer", Gauge}, + {"Load FP", Gauge}, + {"Load Pair", Gauge}, + {"Load Literal", Gauge}, + + {"Store Integer", Gauge}, + {"Store FP", Gauge}, + {"Store Pair", Gauge}, + + {"PC Addressing", Gauge}, + {"Other", Gauge}, + {"NEON", Gauge}, + {"Crypto", Gauge}}; + + +Instrument::Instrument(const char* datafile, uint64_t sample_period) + : output_stream_(stdout), sample_period_(sample_period) { + // Set up the output stream. If datafile is non-NULL, use that file. If it + // can't be opened, or datafile is NULL, use stdout. + if (datafile != NULL) { + output_stream_ = fopen(datafile, "w"); + if (output_stream_ == NULL) { + printf("Can't open output file %s. Using stdout.\n", datafile); + output_stream_ = stdout; + } + } + + static const int num_counters = + sizeof(kCounterList) / sizeof(CounterDescriptor); + + // Dump an instrumentation description comment at the top of the file. + fprintf(output_stream_, "# counters=%d\n", num_counters); + fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_); + + // Construct Counter objects from counter description array. + for (int i = 0; i < num_counters; i++) { + Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type); + counters_.push_back(counter); + } + + DumpCounterNames(); +} + + +Instrument::~Instrument() { + // Dump any remaining instruction data to the output file. + DumpCounters(); + + // Free all the counter objects. + std::list<Counter*>::iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + delete *it; + } + + if (output_stream_ != stdout) { + fclose(output_stream_); + } +} + + +void Instrument::Update() { + // Increment the instruction counter, and dump all counters if a sample period + // has elapsed. + static Counter* counter = GetCounter("Instruction"); + VIXL_ASSERT(counter->GetType() == Cumulative); + counter->Increment(); + + if ((sample_period_ != 0) && counter->IsEnabled() && + (counter->GetCount() % sample_period_) == 0) { + DumpCounters(); + } +} + + +void Instrument::DumpCounters() { + // Iterate through the counter objects, dumping their values to the output + // stream. + std::list<Counter*>::const_iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount()); + } + fprintf(output_stream_, "\n"); + fflush(output_stream_); +} + + +void Instrument::DumpCounterNames() { + // Iterate through the counter objects, dumping the counter names to the + // output stream. + std::list<Counter*>::const_iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + fprintf(output_stream_, "%s,", (*it)->GetName()); + } + fprintf(output_stream_, "\n"); + fflush(output_stream_); +} + + +void Instrument::HandleInstrumentationEvent(unsigned event) { + switch (event) { + case InstrumentStateEnable: + Enable(); + break; + case InstrumentStateDisable: + Disable(); + break; + default: + DumpEventMarker(event); + } +} + + +void Instrument::DumpEventMarker(unsigned marker) { + // Dumpan event marker to the output stream as a specially formatted comment + // line. + static Counter* counter = GetCounter("Instruction"); + + fprintf(output_stream_, + "# %c%c @ %" PRId64 "\n", + marker & 0xff, + (marker >> 8) & 0xff, + counter->GetCount()); +} + + +Counter* Instrument::GetCounter(const char* name) { + // Get a Counter object by name from the counter list. + std::list<Counter*>::const_iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + if (strcmp((*it)->GetName(), name) == 0) { + return *it; + } + } + + // A Counter by that name does not exist: print an error message to stderr + // and the output file, and exit. + static const char* error_message = + "# Error: Unknown counter \"%s\". Exiting.\n"; + fprintf(stderr, error_message, name); + fprintf(output_stream_, error_message, name); + exit(1); +} + + +void Instrument::Enable() { + std::list<Counter*>::iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + (*it)->Enable(); + } +} + + +void Instrument::Disable() { + std::list<Counter*>::iterator it; + for (it = counters_.begin(); it != counters_.end(); it++) { + (*it)->Disable(); + } +} + + +void Instrument::VisitPCRelAddressing(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("PC Addressing"); + counter->Increment(); +} + + +void Instrument::VisitAddSubImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitLogicalImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Logical DP"); + counter->Increment(); +} + + +void Instrument::VisitMoveWideImmediate(const Instruction* instr) { + Update(); + static Counter* counter = GetCounter("Move Immediate"); + + if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) { + unsigned imm = instr->GetImmMoveWide(); + HandleInstrumentationEvent(imm); + } else { + counter->Increment(); + } +} + + +void Instrument::VisitBitfield(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitExtract(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitUnconditionalBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Unconditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Unconditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitCompareBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Compare and Branch"); + counter->Increment(); +} + + +void Instrument::VisitTestBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Test and Branch"); + counter->Increment(); +} + + +void Instrument::VisitConditionalBranch(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Branch"); + counter->Increment(); +} + + +void Instrument::VisitSystem(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitException(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::InstrumentLoadStorePair(const Instruction* instr) { + static Counter* load_pair_counter = GetCounter("Load Pair"); + static Counter* store_pair_counter = GetCounter("Store Pair"); + + if (instr->Mask(LoadStorePairLBit) != 0) { + load_pair_counter->Increment(); + } else { + store_pair_counter->Increment(); + } +} + + +void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairOffset(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) { + Update(); + InstrumentLoadStorePair(instr); +} + + +void Instrument::VisitLoadStoreExclusive(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitAtomicMemory(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitLoadLiteral(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Load Literal"); + counter->Increment(); +} + + +void Instrument::VisitLoadStorePAC(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Load Integer"); + counter->Increment(); +} + + +void Instrument::InstrumentLoadStore(const Instruction* instr) { + static Counter* load_int_counter = GetCounter("Load Integer"); + static Counter* store_int_counter = GetCounter("Store Integer"); + static Counter* load_fp_counter = GetCounter("Load FP"); + static Counter* store_fp_counter = GetCounter("Store FP"); + + switch (instr->Mask(LoadStoreMask)) { + case STRB_w: + case STRH_w: + case STR_w: + VIXL_FALLTHROUGH(); + case STR_x: + store_int_counter->Increment(); + break; + case STR_s: + VIXL_FALLTHROUGH(); + case STR_d: + store_fp_counter->Increment(); + break; + case LDRB_w: + case LDRH_w: + case LDR_w: + case LDR_x: + case LDRSB_x: + case LDRSH_x: + case LDRSW_x: + case LDRSB_w: + VIXL_FALLTHROUGH(); + case LDRSH_w: + load_int_counter->Increment(); + break; + case LDR_s: + VIXL_FALLTHROUGH(); + case LDR_d: + load_fp_counter->Increment(); + break; + } +} + + +void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStorePostIndex(const Instruction* instr) { + USE(instr); + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStorePreIndex(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + +void Instrument::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) { + Update(); + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case STLURB: + case STLURH: + case STLUR_w: + case STLUR_x: { + static Counter* counter = GetCounter("Store Integer"); + counter->Increment(); + break; + } + case LDAPURB: + case LDAPURSB_w: + case LDAPURSB_x: + case LDAPURH: + case LDAPURSH_w: + case LDAPURSH_x: + case LDAPUR_w: + case LDAPURSW: + case LDAPUR_x: { + static Counter* counter = GetCounter("Load Integer"); + counter->Increment(); + break; + } + } +} + + +void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) { + Update(); + InstrumentLoadStore(instr); +} + + +void Instrument::VisitLogicalShifted(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Logical DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubShifted(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubExtended(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitAddSubWithCarry(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Add/Sub DP"); + counter->Increment(); +} + + +void Instrument::VisitRotateRightIntoFlags(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitEvaluateIntoFlags(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitConditionalCompareRegister(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitConditionalSelect(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Select"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing1Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing2Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitDataProcessing3Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other Int DP"); + counter->Increment(); +} + + +void Instrument::VisitFPCompare(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPConditionalCompare(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Compare"); + counter->Increment(); +} + + +void Instrument::VisitFPConditionalSelect(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Conditional Select"); + counter->Increment(); +} + + +void Instrument::VisitFPImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPIntegerConvert(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitFPFixedPointConvert(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("FP DP"); + counter->Increment(); +} + + +void Instrument::VisitCrypto2RegSHA(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitCrypto3RegSHA(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitCryptoAES(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Crypto"); + counter->Increment(); +} + + +void Instrument::VisitNEON2RegMisc(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3Same(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3SameFP16(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3SameExtra(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEON3Different(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONAcrossLanes(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONByIndexedElement(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONCopy(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONExtract(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3Diff(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3Same(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarCopy(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarPairwise(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONShiftImmediate(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONTable(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitNEONPerm(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("NEON"); + counter->Increment(); +} + + +void Instrument::VisitReserved(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitUnallocated(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +void Instrument::VisitUnimplemented(const Instruction* instr) { + USE(instr); + Update(); + static Counter* counter = GetCounter("Other"); + counter->Increment(); +} + + +} // namespace aarch64 +} // namespace vixl diff --git a/src/aarch64/instrument-aarch64.h b/src/aarch64/instrument-aarch64.h new file mode 100644 index 00000000..4401b3ea --- /dev/null +++ b/src/aarch64/instrument-aarch64.h @@ -0,0 +1,117 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_ +#define VIXL_AARCH64_INSTRUMENT_AARCH64_H_ + +#include "../globals-vixl.h" +#include "../utils-vixl.h" + +#include "constants-aarch64.h" +#include "decoder-aarch64.h" +#include "instrument-aarch64.h" + +namespace vixl { +namespace aarch64 { + +const int kCounterNameMaxLength = 256; +const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22; + + +enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 }; + + +enum CounterType { + Gauge = 0, // Gauge counters reset themselves after reading. + Cumulative = 1 // Cumulative counters keep their value after reading. +}; + + +class Counter { + public: + explicit Counter(const char* name, CounterType type = Gauge); + + void Increment(); + void Enable(); + void Disable(); + bool IsEnabled(); + uint64_t GetCount(); + VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); } + + const char* GetName(); + VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); } + + CounterType GetType(); + VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); } + + private: + char name_[kCounterNameMaxLength]; + uint64_t count_; + bool enabled_; + CounterType type_; +}; + + +class Instrument : public DecoderVisitor { + public: + explicit Instrument( + const char* datafile = NULL, + uint64_t sample_period = kDefaultInstrumentationSamplingPeriod); + ~Instrument(); + + void Enable(); + void Disable(); + +// Declare all Visitor functions. +#define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE; + VISITOR_LIST(DECLARE) +#undef DECLARE + + private: + void Update(); + void DumpCounters(); + void DumpCounterNames(); + void DumpEventMarker(unsigned marker); + void HandleInstrumentationEvent(unsigned event); + Counter* GetCounter(const char* name); + + void InstrumentLoadStore(const Instruction* instr); + void InstrumentLoadStorePair(const Instruction* instr); + + std::list<Counter*> counters_; + + FILE* output_stream_; + + // Counter information is dumped every sample_period_ instructions decoded. + // For a sample_period_ = 0 a final counter value is only produced when the + // Instrumentation class is destroyed. + uint64_t sample_period_; +}; + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_AARCH64_INSTRUMENT_AARCH64_H_ diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc index cab02573..e7ede2f9 100644 --- a/src/aarch64/logic-aarch64.cc +++ b/src/aarch64/logic-aarch64.cc @@ -184,28 +184,14 @@ void Simulator::ld1(VectorFormat vform, } -void Simulator::ld1r(VectorFormat vform, - VectorFormat unpack_vform, - LogicVRegister dst, - uint64_t addr, - bool is_signed) { - unsigned unpack_size = LaneSizeInBitsFromFormat(unpack_vform); +void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (is_signed) { - dst.ReadIntFromMem(vform, unpack_size, i, addr); - } else { - dst.ReadUintFromMem(vform, unpack_size, i, addr); - } + dst.ReadUintFromMem(vform, i, addr); } } -void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { - ld1r(vform, vform, dst, addr); -} - - void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, @@ -564,7 +550,6 @@ LogicVRegister Simulator::add(VectorFormat vform, const LogicVRegister& src2) { int lane_size = LaneSizeInBitsFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for unsigned saturation. uint64_t ua = src1.UintLeftJustified(vform, i); @@ -583,40 +568,13 @@ LogicVRegister Simulator::add(VectorFormat vform, if ((pos_a == pos_b) && (pos_a != pos_r)) { dst.SetSignedSat(i, pos_a); } - dst.SetInt(vform, i, ur >> (64 - lane_size)); - } - return dst; -} - -LogicVRegister Simulator::add_uint(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - uint64_t value) { - int lane_size = LaneSizeInBitsFromFormat(vform); - VIXL_ASSERT(IsUintN(lane_size, value)); - dst.ClearForWrite(vform); - // Left-justify `value`. - uint64_t ub = value << (64 - lane_size); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - // Test for unsigned saturation. - uint64_t ua = src1.UintLeftJustified(vform, i); - uint64_t ur = ua + ub; - if (ur < ua) { - dst.SetUnsignedSat(i, true); - } - - // Test for signed saturation. - // `value` is always positive, so we have an overflow if the (signed) result - // is smaller than the first operand. - if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) { - dst.SetSignedSat(i, true); - } dst.SetInt(vform, i, ur >> (64 - lane_size)); } return dst; } + LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -628,68 +586,25 @@ LogicVRegister Simulator::addp(VectorFormat vform, return dst; } -LogicVRegister Simulator::sdiv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int64_t val1 = src1.Int(vform, i); - int64_t val2 = src2.Int(vform, i); - int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt; - int64_t quotient = 0; - if ((val1 == min_int) && (val2 == -1)) { - quotient = min_int; - } else if (val2 != 0) { - quotient = val1 / val2; - } - dst.SetInt(vform, i, quotient); - } - - return dst; -} - -LogicVRegister Simulator::udiv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t val1 = src1.Uint(vform, i); - uint64_t val2 = src2.Uint(vform, i); - uint64_t quotient = 0; - if (val2 != 0) { - quotient = val1 / val2; - } - dst.SetUint(vform, i, quotient); - } - - return dst; -} - LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); - add(vform, dst, srca, temp); + add(vform, dst, dst, temp); return dst; } LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); - sub(vform, dst, srca, temp); + sub(vform, dst, dst, temp); return dst; } @@ -699,7 +614,6 @@ LogicVRegister Simulator::mul(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); } @@ -718,70 +632,6 @@ LogicVRegister Simulator::mul(VectorFormat vform, } -LogicVRegister Simulator::smulh(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int64_t dst_val; - int64_t val1 = src1.Int(vform, i); - int64_t val2 = src2.Int(vform, i); - switch (LaneSizeInBitsFromFormat(vform)) { - case 8: - dst_val = internal::MultiplyHigh<8>(val1, val2); - break; - case 16: - dst_val = internal::MultiplyHigh<16>(val1, val2); - break; - case 32: - dst_val = internal::MultiplyHigh<32>(val1, val2); - break; - case 64: - dst_val = internal::MultiplyHigh<64>(val1, val2); - break; - default: - dst_val = 0xbadbeef; - VIXL_UNREACHABLE(); - break; - } - dst.SetInt(vform, i, dst_val); - } - return dst; -} - - -LogicVRegister Simulator::umulh(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t dst_val; - uint64_t val1 = src1.Uint(vform, i); - uint64_t val2 = src2.Uint(vform, i); - switch (LaneSizeInBitsFromFormat(vform)) { - case 8: - dst_val = internal::MultiplyHigh<8>(val1, val2); - break; - case 16: - dst_val = internal::MultiplyHigh<16>(val1, val2); - break; - case 32: - dst_val = internal::MultiplyHigh<32>(val1, val2); - break; - case 64: - dst_val = internal::MultiplyHigh<64>(val1, val2); - break; - default: - dst_val = 0xbadbeef; - VIXL_UNREACHABLE(); - break; - } - dst.SetUint(vform, i, dst_val); - } - return dst; -} - - LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -789,7 +639,7 @@ LogicVRegister Simulator::mla(VectorFormat vform, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); - return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); + return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -800,7 +650,7 @@ LogicVRegister Simulator::mls(VectorFormat vform, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); - return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); + return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -1048,14 +898,8 @@ LogicVRegister Simulator::sdot(VectorFormat vform, const LogicVRegister& src2, int index) { SimVRegister temp; - // NEON indexed `dot` allows the index value exceed the register size. - // Promote the format to Q-sized vector format before the duplication. - dup_elements_to_segments(IsSVEFormat(vform) ? vform - : VectorFormatFillQ(vform), - temp, - src2, - index); - return sdot(vform, dst, src1, temp); + VectorFormat indexform = VectorFormatFillQ(vform); + return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -1076,14 +920,8 @@ LogicVRegister Simulator::udot(VectorFormat vform, const LogicVRegister& src2, int index) { SimVRegister temp; - // NEON indexed `dot` allows the index value exceed the register size. - // Promote the format to Q-sized vector format before the duplication. - dup_elements_to_segments(IsSVEFormat(vform) ? vform - : VectorFormatFillQ(vform), - temp, - src2, - index); - return udot(vform, dst, src1, temp); + VectorFormat indexform = VectorFormatFillQ(vform); + return udot(vform, dst, src1, dup_element(indexform, temp, src2, index)); } @@ -1187,34 +1025,6 @@ LogicVRegister Simulator::sub(VectorFormat vform, return dst; } -LogicVRegister Simulator::sub_uint(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - uint64_t value) { - int lane_size = LaneSizeInBitsFromFormat(vform); - VIXL_ASSERT(IsUintN(lane_size, value)); - dst.ClearForWrite(vform); - // Left-justify `value`. - uint64_t ub = value << (64 - lane_size); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - // Test for unsigned saturation. - uint64_t ua = src1.UintLeftJustified(vform, i); - uint64_t ur = ua - ub; - if (ub > ua) { - dst.SetUnsignedSat(i, false); - } - - // Test for signed saturation. - // `value` is always positive, so we have an overflow if the (signed) result - // is greater than the first operand. - if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) { - dst.SetSignedSat(i, false); - } - - dst.SetInt(vform, i, ur >> (64 - lane_size)); - } - return dst; -} LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, @@ -1281,12 +1091,12 @@ LogicVRegister Simulator::bic(VectorFormat vform, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; ++i) { + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { result[i] = src.Uint(vform, i) & ~imm; } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -1488,13 +1298,10 @@ LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister Simulator::sminmaxv(VectorFormat vform, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, bool max) { int64_t dst_val = max ? INT64_MIN : INT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - int64_t src_val = src.Int(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; @@ -1511,35 +1318,15 @@ LogicVRegister Simulator::sminmaxv(VectorFormat vform, LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - sminmaxv(vform, dst, GetPTrue(), src, true); - return dst; -} - - -LogicVRegister Simulator::sminv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src) { - sminmaxv(vform, dst, GetPTrue(), src, false); - return dst; -} - - -LogicVRegister Simulator::smaxv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - sminmaxv(vform, dst, pg, src, true); + sminmaxv(vform, dst, src, true); return dst; } LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - sminmaxv(vform, dst, pg, src, false); + sminmaxv(vform, dst, src, false); return dst; } @@ -1627,13 +1414,10 @@ LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister Simulator::uminmaxv(VectorFormat vform, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, bool max) { uint64_t dst_val = max ? 0 : UINT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - uint64_t src_val = src.Uint(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; @@ -1650,7 +1434,7 @@ LogicVRegister Simulator::uminmaxv(VectorFormat vform, LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uminmaxv(vform, dst, GetPTrue(), src, true); + uminmaxv(vform, dst, src, true); return dst; } @@ -1658,27 +1442,7 @@ LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uminmaxv(vform, dst, GetPTrue(), src, false); - return dst; -} - - -LogicVRegister Simulator::umaxv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uminmaxv(vform, dst, pg, src, true); - return dst; -} - - -LogicVRegister Simulator::uminv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uminmaxv(vform, dst, pg, src, false); + uminmaxv(vform, dst, src, false); return dst; } @@ -1757,104 +1521,14 @@ LogicVRegister Simulator::ushll2(VectorFormat vform, return ushl(vform, dst, extendedreg, shiftreg); } -std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform, - const LogicPRegister& pg, - const LogicVRegister& src, - int offset_from_last_active) { - // Untested for any other values. - VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1)); - - int last_active = GetLastActive(vform, pg); - int lane_count = LaneCountFromFormat(vform); - int index = - ((last_active + offset_from_last_active) + lane_count) % lane_count; - return std::make_pair(last_active >= 0, src.Uint(vform, index)); -} - -LogicVRegister Simulator::compact(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - int j = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (pg.IsActive(vform, i)) { - dst.SetUint(vform, j++, src.Uint(vform, i)); - } - } - for (; j < LaneCountFromFormat(vform); j++) { - dst.SetUint(vform, j, 0); - } - return dst; -} - -LogicVRegister Simulator::splice(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src1, - const LogicVRegister& src2) { - int lane_count = LaneCountFromFormat(vform); - int first_active = GetFirstActive(vform, pg); - int last_active = GetLastActive(vform, pg); - int dst_idx = 0; - uint64_t result[kZRegMaxSizeInBytes]; - - if (first_active >= 0) { - VIXL_ASSERT(last_active >= first_active); - VIXL_ASSERT(last_active < lane_count); - for (int i = first_active; i <= last_active; i++) { - result[dst_idx++] = src1.Uint(vform, i); - } - } - - VIXL_ASSERT(dst_idx <= lane_count); - for (int i = dst_idx; i < lane_count; i++) { - result[i] = src2.Uint(vform, i - dst_idx); - } - - for (int i = 0; i < lane_count; i++) { - dst.SetUint(vform, i, result[i]); - } - return dst; -} - -LogicVRegister Simulator::sel(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src1, - const LogicVRegister& src2) { - int p_reg_bits_per_lane = - LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit; - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { - uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane) - ? src1.Uint(vform, lane) - : src2.Uint(vform, lane); - dst.SetUint(vform, lane, lane_value); - } - return dst; -} - - -LogicPRegister Simulator::sel(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src1, - const LogicPRegister& src2) { - for (int i = 0; i < dst.GetChunkCount(); i++) { - LogicPRegister::ChunkType mask = pg.GetChunk(i); - LogicPRegister::ChunkType result = - (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i)); - dst.SetChunk(i, result); - } - return dst; -} - LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; i++) { + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted = src_lane << shift; @@ -1903,10 +1577,10 @@ LogicVRegister Simulator::sri(VectorFormat vform, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); - int lane_count = LaneCountFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); VIXL_ASSERT((shift > 0) && (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); - for (int i = 0; i < lane_count; i++) { + for (int i = 0; i < laneCount; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted; @@ -1989,18 +1663,15 @@ LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); - int lane_count = LaneCountFromFormat(vform); - - // Ensure that we can store one result per lane. - int result[kZRegMaxSizeInBytes]; - - for (int i = 0; i < lane_count; i++) { - result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits); + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2010,51 +1681,38 @@ LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); - int lane_count = LaneCountFromFormat(vform); - - // Ensure that we can store one result per lane. - int result[kZRegMaxSizeInBytes]; - - for (int i = 0; i < lane_count; i++) { - result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits); + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } -LogicVRegister Simulator::cnot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src) { - dst.ClearForWrite(vform); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0; - dst.SetUint(vform, i, value); - } - return dst; -} - - LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); - int lane_count = LaneCountFromFormat(vform); - - // Ensure that we can store one result per lane. - int result[kZRegMaxSizeInBytes]; - - for (int i = 0; i < lane_count; i++) { - result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits); + uint64_t result[16]; + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; i++) { + uint64_t value = src.Uint(vform, i); + result[i] = 0; + for (int j = 0; j < laneSizeInBits; j++) { + result[i] += (value & 1); + value >>= 1; + } } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2238,108 +1896,11 @@ LogicVRegister Simulator::abs(VectorFormat vform, } -LogicVRegister Simulator::andv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform)); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - result &= src.Uint(vform, i); - } - VectorFormat vform_dst = - ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); - dst.ClearForWrite(vform_dst); - dst.SetUint(vform_dst, 0, result); - return dst; -} - - -LogicVRegister Simulator::eorv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uint64_t result = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - result ^= src.Uint(vform, i); - } - VectorFormat vform_dst = - ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); - dst.ClearForWrite(vform_dst); - dst.SetUint(vform_dst, 0, result); - return dst; -} - - -LogicVRegister Simulator::orv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uint64_t result = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - result |= src.Uint(vform, i); - } - VectorFormat vform_dst = - ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); - dst.ClearForWrite(vform_dst); - dst.SetUint(vform_dst, 0, result); - return dst; -} - - -LogicVRegister Simulator::saddv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize); - int64_t result = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - // The destination register always has D-lane sizes and the source register - // always has S-lanes or smaller, so signed integer overflow -- undefined - // behaviour -- can't occur. - result += src.Int(vform, i); - } - - dst.ClearForWrite(kFormatD); - dst.SetInt(kFormatD, 0, result); - return dst; -} - - -LogicVRegister Simulator::uaddv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - uint64_t result = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - result += src.Uint(vform, i); - } - - dst.ClearForWrite(kFormatD); - dst.SetUint(kFormatD, 0, result); - return dst; -} - - LogicVRegister Simulator::extractnarrow(VectorFormat dstform, LogicVRegister dst, - bool dst_is_signed, + bool dstIsSigned, const LogicVRegister& src, - bool src_is_signed) { + bool srcIsSigned) { bool upperhalf = false; VectorFormat srcform = kFormatUndefined; int64_t ssrc[8]; @@ -2408,7 +1969,7 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, } // Test for unsigned saturation - if (src_is_signed) { + if (srcIsSigned) { if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { dst.SetUnsignedSat(offset + i, true); } else if (ssrc[i] < 0) { @@ -2421,13 +1982,13 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform, } int64_t result; - if (src_is_signed) { + if (srcIsSigned) { result = ssrc[i] & MaxUintFromFormat(dstform); } else { result = usrc[i] & MaxUintFromFormat(dstform); } - if (dst_is_signed) { + if (dstIsSigned) { dst.SetInt(dstform, offset + i, result); } else { dst.SetUint(dstform, offset + i, result); @@ -2469,17 +2030,17 @@ LogicVRegister Simulator::absdiff(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool is_signed) { + bool issigned) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { - bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i)) - : (src1.Uint(vform, i) > src2.Uint(vform, i)); - // Always calculate the answer using unsigned arithmetic, to avoid - // implemenation-defined signed overflow. - if (src1_gt_src2) { - dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i)); + if (issigned) { + int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); + sr = sr > 0 ? sr : -sr; + dst.SetInt(vform, i, sr); } else { - dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i)); + int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); + sr = sr > 0 ? sr : -sr; + dst.SetUint(vform, i, sr); } } return dst; @@ -2524,15 +2085,15 @@ LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int laneSizeInBits = LaneSizeInBitsFromFormat(vform); uint64_t reversed_value; uint64_t value; - for (int i = 0; i < lane_count; i++) { + for (int i = 0; i < laneCount; i++) { value = src.Uint(vform, i); reversed_value = 0; - for (int j = 0; j < lane_size_in_bits; j++) { + for (int j = 0; j < laneSizeInBits; j++) { reversed_value = (reversed_value << 1) | (value & 1); value >>= 1; } @@ -2540,7 +2101,7 @@ LogicVRegister Simulator::rbit(VectorFormat vform, } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2549,33 +2110,19 @@ LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src) { - VIXL_ASSERT(IsSVEFormat(vform)); - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count / 2; i++) { - uint64_t t = src.Uint(vform, i); - dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1)); - dst.SetUint(vform, lane_count - i - 1, t); - } - return dst; -} - - -LogicVRegister Simulator::rev_byte(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - int rev_size) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int lane_size = LaneSizeInBytesFromFormat(vform); - int lanes_per_loop = rev_size / lane_size; - for (int i = 0; i < lane_count; i += lanes_per_loop) { - for (int j = 0; j < lanes_per_loop; j++) { - result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j); + const LogicVRegister& src, + int revSize) { + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int laneSize = LaneSizeInBytesFromFormat(vform); + int lanesPerLoop = revSize / laneSize; + for (int i = 0; i < laneCount; i += lanesPerLoop) { + for (int j = 0; j < lanesPerLoop; j++) { + result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); } } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -2585,21 +2132,21 @@ LogicVRegister Simulator::rev_byte(VectorFormat vform, LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev_byte(vform, dst, src, 2); + return rev(vform, dst, src, 2); } LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev_byte(vform, dst, src, 4); + return rev(vform, dst, src, 4); } LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - return rev_byte(vform, dst, src, 8); + return rev(vform, dst, src, 8); } @@ -2668,60 +2215,22 @@ LogicVRegister Simulator::ext(VectorFormat vform, const LogicVRegister& src1, const LogicVRegister& src2, int index) { - uint8_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count - index; ++i) { + uint8_t result[16]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount - index; ++i) { result[i] = src1.Uint(vform, i + index); } for (int i = 0; i < index; ++i) { - result[lane_count - index + i] = src2.Uint(vform, i); + result[laneCount - index + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } template <typename T> -LogicVRegister Simulator::fadda(VectorFormat vform, - LogicVRegister acc, - const LogicPRegister& pg, - const LogicVRegister& src) { - T result = acc.Float<T>(0); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - result = FPAdd(result, src.Float<T>(i)); - } - VectorFormat vform_dst = - ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); - acc.ClearForWrite(vform_dst); - acc.SetFloat(0, result); - return acc; -} - -LogicVRegister Simulator::fadda(VectorFormat vform, - LogicVRegister acc, - const LogicPRegister& pg, - const LogicVRegister& src) { - switch (LaneSizeInBitsFromFormat(vform)) { - case kHRegSize: - fadda<SimFloat16>(vform, acc, pg, src); - break; - case kSRegSize: - fadda<float>(vform, acc, pg, src); - break; - case kDRegSize: - fadda<double>(vform, acc, pg, src); - break; - default: - VIXL_UNREACHABLE(); - } - return acc; -} - -template <typename T> LogicVRegister Simulator::fcadd(VectorFormat vform, LogicVRegister dst, // d const LogicVRegister& src1, // n @@ -2764,7 +2273,7 @@ LogicVRegister Simulator::fcadd(VectorFormat vform, const LogicVRegister& src2, // m int rot) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fcadd<SimFloat16>(vform, dst, src1, src2, rot); + VIXL_UNIMPLEMENTED(); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fcadd<float>(vform, dst, src1, src2, rot); } else { @@ -2774,12 +2283,12 @@ LogicVRegister Simulator::fcadd(VectorFormat vform, return dst; } + template <typename T> LogicVRegister Simulator::fcmla(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - const LogicVRegister& acc, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m int index, int rot) { int elements = LaneCountFromFormat(vform); @@ -2792,33 +2301,83 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) for (int e = 0; e <= (elements / 2) - 1; e++) { - // Index == -1 indicates a vector/vector rather than vector/indexed-element - // operation. - int f = (index < 0) ? e : index; + switch (rot) { + case 0: + element1 = src2.Float<T>(index * 2); + element2 = src1.Float<T>(e * 2); + element3 = src2.Float<T>(index * 2 + 1); + element4 = src1.Float<T>(e * 2); + break; + case 90: + element1 = FPNeg(src2.Float<T>(index * 2 + 1)); + element2 = src1.Float<T>(e * 2 + 1); + element3 = src2.Float<T>(index * 2); + element4 = src1.Float<T>(e * 2 + 1); + break; + case 180: + element1 = FPNeg(src2.Float<T>(index * 2)); + element2 = src1.Float<T>(e * 2); + element3 = FPNeg(src2.Float<T>(index * 2 + 1)); + element4 = src1.Float<T>(e * 2); + break; + case 270: + element1 = src2.Float<T>(index * 2 + 1); + element2 = src1.Float<T>(e * 2 + 1); + element3 = FPNeg(src2.Float<T>(index * 2)); + element4 = src1.Float<T>(e * 2 + 1); + break; + default: + VIXL_UNREACHABLE(); + return dst; // prevents "element(n) may be unintialized" errors + } + dst.ClearForWrite(vform); + dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); + dst.SetFloat<T>(e * 2 + 1, + FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); + } + return dst; +} + + +template <typename T> +LogicVRegister Simulator::fcmla(VectorFormat vform, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m + int rot) { + int elements = LaneCountFromFormat(vform); + + T element1, element2, element3, element4; + rot *= 90; + + // Loop example: + // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) + // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) + for (int e = 0; e <= (elements / 2) - 1; e++) { switch (rot) { case 0: - element1 = src2.Float<T>(f * 2); + element1 = src2.Float<T>(e * 2); element2 = src1.Float<T>(e * 2); - element3 = src2.Float<T>(f * 2 + 1); + element3 = src2.Float<T>(e * 2 + 1); element4 = src1.Float<T>(e * 2); break; case 90: - element1 = FPNeg(src2.Float<T>(f * 2 + 1)); + element1 = FPNeg(src2.Float<T>(e * 2 + 1)); element2 = src1.Float<T>(e * 2 + 1); - element3 = src2.Float<T>(f * 2); + element3 = src2.Float<T>(e * 2); element4 = src1.Float<T>(e * 2 + 1); break; case 180: - element1 = FPNeg(src2.Float<T>(f * 2)); + element1 = FPNeg(src2.Float<T>(e * 2)); element2 = src1.Float<T>(e * 2); - element3 = FPNeg(src2.Float<T>(f * 2 + 1)); + element3 = FPNeg(src2.Float<T>(e * 2 + 1)); element4 = src1.Float<T>(e * 2); break; case 270: - element1 = src2.Float<T>(f * 2 + 1); + element1 = src2.Float<T>(e * 2 + 1); element2 = src1.Float<T>(e * 2 + 1); - element3 = FPNeg(src2.Float<T>(f * 2)); + element3 = FPNeg(src2.Float<T>(e * 2)); element4 = src1.Float<T>(e * 2 + 1); break; default: @@ -2826,28 +2385,25 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, return dst; // prevents "element(n) may be unintialized" errors } dst.ClearForWrite(vform); - dst.SetFloat<T>(vform, - e * 2, - FPMulAdd(acc.Float<T>(e * 2), element2, element1)); - dst.SetFloat<T>(vform, - e * 2 + 1, - FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3)); + dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); + dst.SetFloat<T>(e * 2 + 1, + FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); } return dst; } + LogicVRegister Simulator::fcmla(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - const LogicVRegister& acc, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m int rot) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot); + VIXL_UNIMPLEMENTED(); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fcmla<float>(vform, dst, src1, src2, acc, -1, rot); + fcmla<float>(vform, dst, src1, src2, rot); } else { - fcmla<double>(vform, dst, src1, src2, acc, -1, rot); + fcmla<double>(vform, dst, src1, src2, rot); } return dst; } @@ -2862,9 +2418,9 @@ LogicVRegister Simulator::fcmla(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { VIXL_UNIMPLEMENTED(); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fcmla<float>(vform, dst, src1, src2, dst, index, rot); + fcmla<float>(vform, dst, src1, src2, index, rot); } else { - fcmla<double>(vform, dst, src1, src2, dst, index, rot); + fcmla<double>(vform, dst, src1, src2, index, rot); } return dst; } @@ -2874,59 +2430,23 @@ LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index) { - if (vform == kFormatVnQ) { - // When duplicating a 128-bit value, split it into two 64-bit parts, and - // then copy the two to their slots on destination register. - uint64_t low = src.Uint(kFormatVnD, src_index * 2); - uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1); - dst.ClearForWrite(vform); - for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD); - d_lane += 2) { - dst.SetUint(kFormatVnD, d_lane, low); - dst.SetUint(kFormatVnD, d_lane + 1, high); - } - } else { - int lane_count = LaneCountFromFormat(vform); - uint64_t value = src.Uint(vform, src_index); - dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { - dst.SetUint(vform, i, value); - } - } - return dst; -} - -LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - int src_index) { - // In SVE, a segment is a 128-bit portion of a vector, like a Q register, - // whereas in NEON, the size of segment is equal to the size of register - // itself. - int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform)); - VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform))); - int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform); - - VIXL_ASSERT(src_index >= 0); - VIXL_ASSERT(src_index < lanes_per_segment); - + int laneCount = LaneCountFromFormat(vform); + uint64_t value = src.Uint(vform, src_index); dst.ClearForWrite(vform); - for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) { - uint64_t value = src.Uint(vform, j + src_index); - for (int i = 0; i < lanes_per_segment; i++) { - dst.SetUint(vform, j + i, value); - } + for (int i = 0; i < laneCount; ++i) { + dst.SetUint(vform, i, value); } return dst; } + LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int lane_count = LaneCountFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); uint64_t value = imm & MaxUintFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, value); } return dst; @@ -2953,93 +2473,12 @@ LogicVRegister Simulator::ins_immediate(VectorFormat vform, } -LogicVRegister Simulator::index(VectorFormat vform, - LogicVRegister dst, - uint64_t start, - uint64_t step) { - VIXL_ASSERT(IsSVEFormat(vform)); - uint64_t value = start; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetUint(vform, i, value); - value += step; - } - return dst; -} - - -LogicVRegister Simulator::insr(VectorFormat vform, - LogicVRegister dst, - uint64_t imm) { - VIXL_ASSERT(IsSVEFormat(vform)); - for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) { - dst.SetUint(vform, i, dst.Uint(vform, i - 1)); - } - dst.SetUint(vform, 0, imm); - return dst; -} - - -LogicVRegister Simulator::mov(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src) { - dst.ClearForWrite(vform); - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { - dst.SetUint(vform, lane, src.Uint(vform, lane)); - } - return dst; -} - - -LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) { - // Avoid a copy if the registers already alias. - if (dst.Aliases(src)) return dst; - - for (int i = 0; i < dst.GetChunkCount(); i++) { - dst.SetChunk(i, src.GetChunk(i)); - } - return dst; -} - - -LogicVRegister Simulator::mov_merging(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src) { - return sel(vform, dst, pg, src, dst); -} - - -LogicVRegister Simulator::mov_zeroing(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src) { - SimVRegister zero; - dup_immediate(vform, zero, 0); - return sel(vform, dst, pg, src, zero); -} - - -LogicPRegister Simulator::mov_merging(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src) { - return sel(dst, pg, src, dst); -} - - -LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src) { - SimPRegister all_false; - return sel(dst, pg, src, pfalse(all_false)); -} - - LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int lane_count = LaneCountFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, imm); } return dst; @@ -3049,9 +2488,9 @@ LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm) { - int lane_count = LaneCountFromFormat(vform); + int laneCount = LaneCountFromFormat(vform); dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, ~imm); } return dst; @@ -3063,12 +2502,12 @@ LogicVRegister Simulator::orr(VectorFormat vform, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; ++i) { + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { result[i] = src.Uint(vform, i) | imm; } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -3129,37 +2568,6 @@ LogicVRegister Simulator::sxtl2(VectorFormat vform, } -LogicVRegister Simulator::uxt(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - unsigned from_size_in_bits) { - int lane_count = LaneCountFromFormat(vform); - uint64_t mask = GetUintMask(from_size_in_bits); - - dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; i++) { - dst.SetInt(vform, i, src.Uint(vform, i) & mask); - } - return dst; -} - - -LogicVRegister Simulator::sxt(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - unsigned from_size_in_bits) { - int lane_count = LaneCountFromFormat(vform); - - dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; i++) { - uint64_t value = - ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i)); - dst.SetInt(vform, i, value); - } - return dst; -} - - LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, @@ -3207,22 +2615,6 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform, return extractnarrow(vformdst, dst, false, shifted_src, false); } -LogicVRegister Simulator::Table(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& tab, - const LogicVRegister& ind) { - VIXL_ASSERT(IsSVEFormat(vform)); - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; i++) { - uint64_t index = ind.Uint(vform, i); - uint64_t value = (index >= static_cast<uint64_t>(lane_count)) - ? 0 - : tab.Uint(vform, static_cast<int>(index)); - dst.SetUint(vform, i, value); - } - return dst; -} - LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, @@ -3790,7 +3182,7 @@ LogicVRegister Simulator::umlsl(VectorFormat vform, SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); + mls(vform, dst, temp1, temp2); return dst; } @@ -3802,7 +3194,7 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform, SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); + mls(vform, dst, temp1, temp2); return dst; } @@ -3814,7 +3206,7 @@ LogicVRegister Simulator::smlsl(VectorFormat vform, SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); + mls(vform, dst, temp1, temp2); return dst; } @@ -3826,7 +3218,7 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform, SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); - mls(vform, dst, dst, temp1, temp2); + mls(vform, dst, temp1, temp2); return dst; } @@ -3838,7 +3230,7 @@ LogicVRegister Simulator::umlal(VectorFormat vform, SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); + mla(vform, dst, temp1, temp2); return dst; } @@ -3850,7 +3242,7 @@ LogicVRegister Simulator::umlal2(VectorFormat vform, SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); + mla(vform, dst, temp1, temp2); return dst; } @@ -3862,7 +3254,7 @@ LogicVRegister Simulator::smlal(VectorFormat vform, SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); + mla(vform, dst, temp1, temp2); return dst; } @@ -3874,7 +3266,7 @@ LogicVRegister Simulator::smlal2(VectorFormat vform, SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); - mla(vform, dst, dst, temp1, temp2); + mla(vform, dst, temp1, temp2); return dst; } @@ -3979,7 +3371,7 @@ LogicVRegister Simulator::dot(VectorFormat vform, dst.ClearForWrite(vform); for (int e = 0; e < LaneCountFromFormat(vform); e++) { - uint64_t result = 0; + int64_t result = 0; int64_t element1, element2; for (int i = 0; i < 4; i++) { int index = 4 * e + i; @@ -3992,7 +3384,9 @@ LogicVRegister Simulator::dot(VectorFormat vform, } result += element1 * element2; } - dst.SetUint(vform, e, result + dst.Uint(vform, e)); + + result += dst.Int(vform, e); + dst.SetInt(vform, e, result); } return dst; } @@ -4170,16 +3564,16 @@ LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int pairs = lane_count / 2; + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, 2 * i); result[(2 * i) + 1] = src2.Uint(vform, 2 * i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -4190,16 +3584,16 @@ LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int pairs = lane_count / 2; + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, (2 * i) + 1); result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -4210,16 +3604,16 @@ LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int pairs = lane_count / 2; + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, i); result[(2 * i) + 1] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -4230,16 +3624,16 @@ LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes]; - int lane_count = LaneCountFromFormat(vform); - int pairs = lane_count / 2; + uint64_t result[16]; + int laneCount = LaneCountFromFormat(vform); + int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, pairs + i); result[(2 * i) + 1] = src2.Uint(vform, pairs + i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; @@ -4250,15 +3644,15 @@ LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes * 2]; - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; ++i) { + uint64_t result[32]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { result[i] = src1.Uint(vform, i); - result[lane_count + i] = src2.Uint(vform, i); + result[laneCount + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[2 * i]); } return dst; @@ -4269,15 +3663,15 @@ LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { - uint64_t result[kZRegMaxSizeInBytes * 2]; - int lane_count = LaneCountFromFormat(vform); - for (int i = 0; i < lane_count; ++i) { + uint64_t result[32]; + int laneCount = LaneCountFromFormat(vform); + for (int i = 0; i < laneCount; ++i) { result[i] = src1.Uint(vform, i); - result[lane_count + i] = src2.Uint(vform, i); + result[laneCount + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); - for (int i = 0; i < lane_count; ++i) { + for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[(2 * i) + 1]); } return dst; @@ -4807,7 +4201,7 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { } else { \ result = OP(op1, op2); \ } \ - dst.SetFloat(vform, i, result); \ + dst.SetFloat(i, result); \ } \ return dst; \ } \ @@ -4850,7 +4244,7 @@ LogicVRegister Simulator::frecps(VectorFormat vform, T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); T result = FPProcessNaNs(op1, op2); - dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); + dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); } return dst; } @@ -4882,7 +4276,7 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform, T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); T result = FPProcessNaNs(op1, op2); - dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); + dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); } return dst; } @@ -4915,41 +4309,29 @@ LogicVRegister Simulator::fcmp(VectorFormat vform, bool result = false; T op1 = src1.Float<T>(i); T op2 = src2.Float<T>(i); - bool unordered = IsNaN(FPProcessNaNs(op1, op2)); - - switch (cond) { - case eq: - result = (op1 == op2); - break; - case ge: - result = (op1 >= op2); - break; - case gt: - result = (op1 > op2); - break; - case le: - result = (op1 <= op2); - break; - case lt: - result = (op1 < op2); - break; - case ne: - result = (op1 != op2); - break; - case uo: - result = unordered; - break; - default: - // Other conditions are defined in terms of those above. - VIXL_UNREACHABLE(); - break; - } - - if (result && unordered) { - // Only `uo` and `ne` can be true for unordered comparisons. - VIXL_ASSERT((cond == uo) || (cond == ne)); + T nan_result = FPProcessNaNs(op1, op2); + if (!IsNaN(nan_result)) { + switch (cond) { + case eq: + result = (op1 == op2); + break; + case ge: + result = (op1 >= op2); + break; + case gt: + result = (op1 > op2); + break; + case le: + result = (op1 <= op2); + break; + case lt: + result = (op1 < op2); + break; + default: + VIXL_UNREACHABLE(); + break; + } } - dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); } return dst; @@ -5021,16 +4403,15 @@ LogicVRegister Simulator::fabscmp(VectorFormat vform, template <typename T> LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = src1.Float<T>(i); T op2 = src2.Float<T>(i); - T acc = srca.Float<T>(i); + T acc = dst.Float<T>(i); T result = FPMulAdd(acc, op1, op2); - dst.SetFloat(vform, i, result); + dst.SetFloat(i, result); } return dst; } @@ -5038,16 +4419,15 @@ LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fmla<SimFloat16>(vform, dst, srca, src1, src2); + fmla<SimFloat16>(vform, dst, src1, src2); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fmla<float>(vform, dst, srca, src1, src2); + fmla<float>(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - fmla<double>(vform, dst, srca, src1, src2); + fmla<double>(vform, dst, src1, src2); } return dst; } @@ -5056,14 +4436,13 @@ LogicVRegister Simulator::fmla(VectorFormat vform, template <typename T> LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = -src1.Float<T>(i); T op2 = src2.Float<T>(i); - T acc = srca.Float<T>(i); + T acc = dst.Float<T>(i); T result = FPMulAdd(acc, op1, op2); dst.SetFloat(i, result); } @@ -5073,16 +4452,15 @@ LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fmls<SimFloat16>(vform, dst, srca, src1, src2); + fmls<SimFloat16>(vform, dst, src1, src2); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fmls<float>(vform, dst, srca, src1, src2); + fmls<float>(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - fmls<double>(vform, dst, srca, src1, src2); + fmls<double>(vform, dst, src1, src2); } return dst; } @@ -5362,131 +4740,75 @@ NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) #undef DEFINE_NEON_FP_PAIR_OP template <typename T> -LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPPairOp<T>::type fn, - uint64_t inactive_value) { - int lane_count = LaneCountFromFormat(vform); - T result[kZRegMaxSizeInBytes / sizeof(T)]; - // Copy the source vector into a working array. Initialise the unused elements - // at the end of the array to the same value that a false predicate would set. - for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) { - result[i] = (i < lane_count) - ? src.Float<T>(i) - : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value); - } - - // Pairwise reduce the elements to a single value, using the pair op function - // argument. - for (int step = 1; step < lane_count; step *= 2) { - for (int i = 0; i < lane_count; i += step * 2) { - result[i] = (this->*fn)(result[i], result[i + step]); - } - } +LogicVRegister Simulator::fminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPMinMaxOp<T>::type Op) { + VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) || + (vform == kFormat4S)); + USE(vform); + T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1)); + T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3)); + if (vform == kFormat8H) { + T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5)); + T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7)); + result1 = (this->*Op)(result1, result3); + result2 = (this->*Op)(result2, result4); + } + T result = (this->*Op)(result1, result2); dst.ClearForWrite(ScalarFormatFromFormat(vform)); - dst.SetFloat<T>(0, result[0]); - return dst; -} - -LogicVRegister Simulator::FPPairedAcrossHelper( - VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPPairOp<SimFloat16>::type fn16, - typename TFPPairOp<float>::type fn32, - typename TFPPairOp<double>::type fn64, - uint64_t inactive_value) { - switch (LaneSizeInBitsFromFormat(vform)) { - case kHRegSize: - return FPPairedAcrossHelper<SimFloat16>(vform, - dst, - src, - fn16, - inactive_value); - case kSRegSize: - return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value); - default: - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - return FPPairedAcrossHelper<double>(vform, - dst, - src, - fn64, - inactive_value); - } + dst.SetFloat<T>(0, result); + return dst; } -LogicVRegister Simulator::faddv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src) { - return FPPairedAcrossHelper(vform, - dst, - src, - &Simulator::FPAdd<SimFloat16>, - &Simulator::FPAdd<float>, - &Simulator::FPAdd<double>, - 0); -} LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size = LaneSizeInBitsFromFormat(vform); - uint64_t inactive_value = - FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); - return FPPairedAcrossHelper(vform, - dst, - src, - &Simulator::FPMax<SimFloat16>, - &Simulator::FPMax<float>, - &Simulator::FPMax<double>, - inactive_value); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>); + } else { + return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>); + } } LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size = LaneSizeInBitsFromFormat(vform); - uint64_t inactive_value = - FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); - return FPPairedAcrossHelper(vform, - dst, - src, - &Simulator::FPMin<SimFloat16>, - &Simulator::FPMin<float>, - &Simulator::FPMin<double>, - inactive_value); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>); + } else { + return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>); + } } LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size = LaneSizeInBitsFromFormat(vform); - uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); - return FPPairedAcrossHelper(vform, - dst, - src, - &Simulator::FPMaxNM<SimFloat16>, - &Simulator::FPMaxNM<float>, - &Simulator::FPMaxNM<double>, - inactive_value); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + return fminmaxv<SimFloat16>(vform, + dst, + src, + &Simulator::FPMaxNM<SimFloat16>); + } else { + return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>); + } } LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { - int lane_size = LaneSizeInBitsFromFormat(vform); - uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); - return FPPairedAcrossHelper(vform, - dst, - src, - &Simulator::FPMinNM<SimFloat16>, - &Simulator::FPMinNM<float>, - &Simulator::FPMinNM<double>, - inactive_value); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + return fminmaxv<SimFloat16>(vform, + dst, + src, + &Simulator::FPMinNM<SimFloat16>); + } else { + return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>); + } } @@ -5521,14 +4843,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform, SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); - fmla<SimFloat16>(vform, dst, dst, src1, index_reg); + fmla<SimFloat16>(vform, dst, src1, index_reg); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); - fmla<float>(vform, dst, dst, src1, index_reg); + fmla<float>(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); - fmla<double>(vform, dst, dst, src1, index_reg); + fmla<double>(vform, dst, src1, index_reg); } return dst; } @@ -5543,14 +4865,14 @@ LogicVRegister Simulator::fmls(VectorFormat vform, SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); - fmls<SimFloat16>(vform, dst, dst, src1, index_reg); + fmls<SimFloat16>(vform, dst, src1, index_reg); } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); - fmls<float>(vform, dst, dst, src1, index_reg); + fmls<float>(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); - fmls<double>(vform, dst, dst, src1, index_reg); + fmls<double>(vform, dst, src1, index_reg); } return dst; } @@ -5619,142 +4941,62 @@ LogicVRegister Simulator::frint(VectorFormat vform, return dst; } -LogicVRegister Simulator::fcvt(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, - 0, - src.Uint(vform, i)); - double dst_value = - RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits); - - uint64_t dst_raw_bits = - FPToRawbitsWithSize(dst_data_size_in_bits, dst_value); - - dst.SetUint(vform, i, dst_raw_bits); - } - - return dst; -} LogicVRegister Simulator::fcvts(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, - FPRounding round, + FPRounding rounding_mode, int fbits) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, - 0, - src.Uint(vform, i)); - double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * - std::pow(2.0, fbits); - - switch (dst_data_size_in_bits) { - case kHRegSize: - dst.SetInt(vform, i, FPToInt16(result, round)); - break; - case kSRegSize: - dst.SetInt(vform, i, FPToInt32(result, round)); - break; - case kDRegSize: - dst.SetInt(vform, i, FPToInt64(result, round)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 op = + static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); + dst.SetInt(vform, i, FPToInt16(op, rounding_mode)); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op = src.Float<float>(i) * std::pow(2.0f, fbits); + dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = src.Float<double>(i) * std::pow(2.0, fbits); + dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); } } - return dst; } -LogicVRegister Simulator::fcvts(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - FPRounding round, - int fbits) { - dst.ClearForWrite(vform); - return fcvts(vform, - LaneSizeInBitsFromFormat(vform), - LaneSizeInBitsFromFormat(vform), - dst, - GetPTrue(), - src, - round, - fbits); -} LogicVRegister Simulator::fcvtu(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, - FPRounding round, + FPRounding rounding_mode, int fbits) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, - 0, - src.Uint(vform, i)); - double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * - std::pow(2.0, fbits); - - switch (dst_data_size_in_bits) { - case kHRegSize: - dst.SetUint(vform, i, FPToUInt16(result, round)); - break; - case kSRegSize: - dst.SetUint(vform, i, FPToUInt32(result, round)); - break; - case kDRegSize: - dst.SetUint(vform, i, FPToUInt64(result, round)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 op = + static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); + dst.SetUint(vform, i, FPToUInt16(op, rounding_mode)); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op = src.Float<float>(i) * std::pow(2.0f, fbits); + dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = src.Float<double>(i) * std::pow(2.0, fbits); + dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); } } - return dst; } -LogicVRegister Simulator::fcvtu(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - FPRounding round, - int fbits) { - dst.ClearForWrite(vform); - return fcvtu(vform, - LaneSizeInBitsFromFormat(vform), - LaneSizeInBitsFromFormat(vform), - dst, - GetPTrue(), - src, - round, - fbits); -} LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, @@ -5966,18 +5208,18 @@ LogicVRegister Simulator::frsqrte(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { SimFloat16 input = src.Float<SimFloat16>(i); - dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input)); + dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input)); } } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float<float>(i); - dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input)); + dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float<double>(i); - dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input)); + dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); } } return dst; @@ -6112,18 +5354,18 @@ LogicVRegister Simulator::frecpe(VectorFormat vform, if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { SimFloat16 input = src.Float<SimFloat16>(i); - dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round)); + dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round)); } } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float<float>(i); - dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round)); + dst.SetFloat(i, FPRecipEstimate<float>(input, round)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float<double>(i); - dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round)); + dst.SetFloat(i, FPRecipEstimate<double>(input, round)); } } return dst; @@ -6184,47 +5426,6 @@ LogicVRegister Simulator::urecpe(VectorFormat vform, return dst; } -LogicPRegister Simulator::pfalse(LogicPRegister dst) { - dst.Clear(); - return dst; -} - -LogicPRegister Simulator::pfirst(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src) { - int first_pg = GetFirstActive(kFormatVnB, pg); - VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB)); - mov(dst, src); - if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true); - return dst; -} - -LogicPRegister Simulator::ptrue(VectorFormat vform, - LogicPRegister dst, - int pattern) { - int count = GetPredicateConstraintLaneCount(vform, pattern); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetActive(vform, i, i < count); - } - return dst; -} - -LogicPRegister Simulator::pnext(VectorFormat vform, - LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src) { - int next = GetLastActive(vform, src) + 1; - while (next < LaneCountFromFormat(vform)) { - if (pg.IsActive(vform, next)) break; - next++; - } - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - dst.SetActive(vform, i, (i == next)); - } - return dst; -} - template <typename T> LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, @@ -6276,1143 +5477,49 @@ LogicVRegister Simulator::frecpx(VectorFormat vform, return dst; } -LogicVRegister Simulator::ftsmul(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - SimVRegister maybe_neg_src1; - - // The bottom bit of src2 controls the sign of the result. Use it to - // conditionally invert the sign of one `fmul` operand. - shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1); - eor(vform, maybe_neg_src1, maybe_neg_src1, src1); - - // Multiply src1 by the modified neg_src1, which is potentially its negation. - // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1, - // rather than neg_src1, must be the first source argument. - fmul(vform, dst, src1, maybe_neg_src1); - - return dst; -} - -LogicVRegister Simulator::ftssel(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - unsigned lane_bits = LaneSizeInBitsFromFormat(vform); - uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1); - uint64_t one; - - if (lane_bits == kHRegSize) { - one = Float16ToRawbits(Float16(1.0)); - } else if (lane_bits == kSRegSize) { - one = FloatToRawbits(1.0); - } else { - VIXL_ASSERT(lane_bits == kDRegSize); - one = DoubleToRawbits(1.0); - } - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - // Use integer accessors for this operation, as this is a data manipulation - // task requiring no calculation. - uint64_t op = src1.Uint(vform, i); - - // Only the bottom two bits of the src2 register are significant, indicating - // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1 - // determines the sign of the value written to dst. - uint64_t q = src2.Uint(vform, i); - if ((q & 1) == 1) op = one; - if ((q & 2) == 2) op ^= sign_bit; - - dst.SetUint(vform, i, op); - } - - return dst; -} - -template <typename T> -LogicVRegister Simulator::FTMaddHelper(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - uint64_t coeff_pos, - uint64_t coeff_neg) { - SimVRegister zero; - dup_immediate(kFormatVnB, zero, 0); - - SimVRegister cf; - SimVRegister cfn; - dup_immediate(vform, cf, coeff_pos); - dup_immediate(vform, cfn, coeff_neg); - - // The specification requires testing the top bit of the raw value, rather - // than the sign of the floating point number, so use an integer comparison - // here. - SimPRegister is_neg; - SVEIntCompareVectorsHelper(lt, - vform, - is_neg, - GetPTrue(), - src2, - zero, - false, - LeaveFlags); - mov_merging(vform, cf, is_neg, cfn); - - SimVRegister temp; - fabs_<T>(vform, temp, src2); - fmla<T>(vform, cf, cf, src1, temp); - mov(vform, dst, cf); - return dst; -} - - -LogicVRegister Simulator::ftmad(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - unsigned index) { - static const uint64_t ftmad_coeff16[] = {0x3c00, - 0xb155, - 0x2030, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x3c00, - 0xb800, - 0x293a, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000}; - - static const uint64_t ftmad_coeff32[] = {0x3f800000, - 0xbe2aaaab, - 0x3c088886, - 0xb95008b9, - 0x36369d6d, - 0x00000000, - 0x00000000, - 0x00000000, - 0x3f800000, - 0xbf000000, - 0x3d2aaaa6, - 0xbab60705, - 0x37cd37cc, - 0x00000000, - 0x00000000, - 0x00000000}; - - static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000, - 0xbfc5555555555543, - 0x3f8111111110f30c, - 0xbf2a01a019b92fc6, - 0x3ec71de351f3d22b, - 0xbe5ae5e2b60f7b91, - 0x3de5d8408868552f, - 0x0000000000000000, - 0x3ff0000000000000, - 0xbfe0000000000000, - 0x3fa5555555555536, - 0xbf56c16c16c13a0b, - 0x3efa01a019b1e8d8, - 0xbe927e4f7282f468, - 0x3e21ee96d2641b13, - 0xbda8f76380fbb401}; - VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64)); - VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64)); - VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64)); - - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - FTMaddHelper<SimFloat16>(vform, - dst, - src1, - src2, - ftmad_coeff16[index], - ftmad_coeff16[index + 8]); - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - FTMaddHelper<float>(vform, - dst, - src1, - src2, - ftmad_coeff32[index], - ftmad_coeff32[index + 8]); - } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - FTMaddHelper<double>(vform, - dst, - src1, - src2, - ftmad_coeff64[index], - ftmad_coeff64[index + 8]); - } - return dst; -} - -LogicVRegister Simulator::fexpa(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src) { - static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045, - 0x005d, 0x0075, 0x008e, 0x00a8, - 0x00c2, 0x00dc, 0x00f8, 0x0114, - 0x0130, 0x014d, 0x016b, 0x0189, - 0x01a8, 0x01c8, 0x01e8, 0x0209, - 0x022b, 0x024e, 0x0271, 0x0295, - 0x02ba, 0x02e0, 0x0306, 0x032e, - 0x0356, 0x037f, 0x03a9, 0x03d4}; - - static const uint64_t fexpa_coeff32[] = - {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, - 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, - 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, - 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, - 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, - 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, - 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, - 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, - 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, - 0x7d3e0c}; - - static const uint64_t fexpa_coeff64[] = - {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8, - 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0, - 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6, - 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b, - 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7, - 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0, - 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da, - 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225, - 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9, - 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed, - 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50, - 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf, - 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2, - 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c, - 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6, - 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8}; - - unsigned lane_size = LaneSizeInBitsFromFormat(vform); - int index_highbit = 5; - int op_highbit, op_shift; - const uint64_t* fexpa_coeff; - - if (lane_size == kHRegSize) { - index_highbit = 4; - VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1))); - fexpa_coeff = fexpa_coeff16; - op_highbit = 9; - op_shift = 10; - } else if (lane_size == kSRegSize) { - VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1))); - fexpa_coeff = fexpa_coeff32; - op_highbit = 13; - op_shift = 23; - } else { - VIXL_ASSERT(lane_size == kDRegSize); - VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1))); - fexpa_coeff = fexpa_coeff64; - op_highbit = 16; - op_shift = 52; - } - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t op = src.Uint(vform, i); - uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)]; - result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift); - dst.SetUint(vform, i, result); - } - return dst; -} - -template <typename T> -LogicVRegister Simulator::fscale(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - T two = T(2.0); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - T s1 = src1.Float<T>(i); - if (!IsNaN(s1)) { - int64_t scale = src2.Int(vform, i); - // TODO: this is a low-performance implementation, but it's simple and - // less likely to be buggy. Consider replacing it with something faster. - - // Scales outside of these bounds become infinity or zero, so there's no - // point iterating further. - scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048); - - // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and - // decrement scale until it's zero. - while (scale-- > 0) { - s1 = FPMul(s1, two); - } - - // If scale is negative, divide by two and increment scale until it's - // zero. Initially, scale is (src2 - 1), so we pre-increment. - while (++scale < 0) { - s1 = FPDiv(s1, two); - } - } - dst.SetFloat<T>(i, s1); - } - return dst; -} - -LogicVRegister Simulator::fscale(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2) { - if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { - fscale<SimFloat16>(vform, dst, src1, src2); - } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { - fscale<float>(vform, dst, src1, src2); - } else { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); - fscale<double>(vform, dst, src1, src2); - } - return dst; -} - -LogicVRegister Simulator::scvtf(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1, - 0, - src.Uint(vform, i)); - - switch (dst_data_size_in_bits) { - case kHRegSize: { - SimFloat16 result = FixedToFloat16(value, fbits, round); - dst.SetUint(vform, i, Float16ToRawbits(result)); - break; - } - case kSRegSize: { - float result = FixedToFloat(value, fbits, round); - dst.SetUint(vform, i, FloatToRawbits(result)); - break; - } - case kDRegSize: { - double result = FixedToDouble(value, fbits, round); - dst.SetUint(vform, i, DoubleToRawbits(result)); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } - } - - return dst; -} - LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { - return scvtf(vform, - LaneSizeInBitsFromFormat(vform), - LaneSizeInBitsFromFormat(vform), - dst, - GetPTrue(), - src, - round, - fbits); -} - -LogicVRegister Simulator::ucvtf(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits) { - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); - VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, - 0, - src.Uint(vform, i)); - - switch (dst_data_size_in_bits) { - case kHRegSize: { - SimFloat16 result = UFixedToFloat16(value, fbits, round); - dst.SetUint(vform, i, Float16ToRawbits(result)); - break; - } - case kSRegSize: { - float result = UFixedToFloat(value, fbits, round); - dst.SetUint(vform, i, FloatToRawbits(result)); - break; - } - case kDRegSize: { - double result = UFixedToDouble(value, fbits, round); - dst.SetUint(vform, i, DoubleToRawbits(result)); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round); + dst.SetFloat<SimFloat16>(i, result); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); + dst.SetFloat<float>(i, result); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); + dst.SetFloat<double>(i, result); } } - return dst; } + LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { - return ucvtf(vform, - LaneSizeInBitsFromFormat(vform), - LaneSizeInBitsFromFormat(vform), - dst, - GetPTrue(), - src, - round, - fbits); -} - -LogicVRegister Simulator::unpk(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - UnpackType unpack_type, - ExtendType extend_type) { - VectorFormat vform_half = VectorFormatHalfWidth(vform); - const int lane_count = LaneCountFromFormat(vform); - const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count; - - switch (extend_type) { - case kSignedExtend: { - int64_t result[kZRegMaxSizeInBytes]; - for (int i = 0; i < lane_count; ++i) { - result[i] = src.Int(vform_half, i + src_start_lane); - } - for (int i = 0; i < lane_count; ++i) { - dst.SetInt(vform, i, result[i]); - } - break; - } - case kUnsignedExtend: { - uint64_t result[kZRegMaxSizeInBytes]; - for (int i = 0; i < lane_count; ++i) { - result[i] = src.Uint(vform_half, i + src_start_lane); - } - for (int i = 0; i < lane_count; ++i) { - dst.SetUint(vform, i, result[i]); - } - break; - } - default: - VIXL_UNREACHABLE(); - } - return dst; -} - -LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond, - VectorFormat vform, - LogicPRegister dst, - const LogicPRegister& mask, - const LogicVRegister& src1, - const LogicVRegister& src2, - bool is_wide_elements, - FlagsUpdate flags) { - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { - bool result = false; - if (mask.IsActive(vform, lane)) { - int64_t op1 = 0xbadbeef; - int64_t op2 = 0xbadbeef; - int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize; - switch (cond) { - case eq: - case ge: - case gt: - case lt: - case le: - case ne: - op1 = src1.Int(vform, lane); - op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane) - : src2.Int(vform, lane); - break; - case hi: - case hs: - case ls: - case lo: - op1 = src1.Uint(vform, lane); - op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane) - : src2.Uint(vform, lane); - break; - default: - VIXL_UNREACHABLE(); - } - - switch (cond) { - case eq: - result = (op1 == op2); - break; - case ne: - result = (op1 != op2); - break; - case ge: - result = (op1 >= op2); - break; - case gt: - result = (op1 > op2); - break; - case le: - result = (op1 <= op2); - break; - case lt: - result = (op1 < op2); - break; - case hs: - result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2)); - break; - case hi: - result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2)); - break; - case ls: - result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2)); - break; - case lo: - result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2)); - break; - default: - VIXL_UNREACHABLE(); - } - } - dst.SetActive(vform, lane, result); - } - - if (flags == SetFlags) PredTest(vform, mask, dst); - - return dst; -} - -LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op, - VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - bool is_wide_elements) { - unsigned lane_size = LaneSizeInBitsFromFormat(vform); - VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform; - - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { - int shift_src_lane = lane; - if (is_wide_elements) { - // If the shift amount comes from wide elements, select the D-sized lane - // which occupies the corresponding lanes of the value to be shifted. - shift_src_lane = (lane * lane_size) / kDRegSize; - } - uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane); - - // Saturate shift_amount to the size of the lane that will be shifted. - if (shift_amount > lane_size) shift_amount = lane_size; - - uint64_t value = src1.Uint(vform, lane); - int64_t result = ShiftOperand(lane_size, - value, - shift_op, - static_cast<unsigned>(shift_amount)); - dst.SetUint(vform, lane, result); - } - - return dst; -} - -LogicVRegister Simulator::asrd(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - int shift) { - VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <= - LaneSizeInBitsFromFormat(vform))); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - int64_t value = src1.Int(vform, i); - if (shift <= 63) { - if (value < 0) { - // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely - // cast to int64_t, and cannot cause signed overflow in the result. - value = value + GetUintMask(shift); - } - value = ShiftOperand(kDRegSize, value, ASR, shift); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round); + dst.SetFloat<SimFloat16>(i, result); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); + dst.SetFloat<float>(i, result); } else { - value = 0; + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); + dst.SetFloat<double>(i, result); } - dst.SetInt(vform, i, value); } return dst; } -LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( - LogicalOp logical_op, - VectorFormat vform, - LogicVRegister zd, - const LogicVRegister& zn, - const LogicVRegister& zm) { - VIXL_ASSERT(IsSVEFormat(vform)); - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t op1 = zn.Uint(vform, i); - uint64_t op2 = zm.Uint(vform, i); - uint64_t result; - switch (logical_op) { - case AND: - result = op1 & op2; - break; - case BIC: - result = op1 & ~op2; - break; - case EOR: - result = op1 ^ op2; - break; - case ORR: - result = op1 | op2; - break; - default: - result = 0; - VIXL_UNIMPLEMENTED(); - } - zd.SetUint(vform, i, result); - } - - return zd; -} - -LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, - LogicPRegister pd, - const LogicPRegister& pn, - const LogicPRegister& pm) { - for (int i = 0; i < pn.GetChunkCount(); i++) { - LogicPRegister::ChunkType op1 = pn.GetChunk(i); - LogicPRegister::ChunkType op2 = pm.GetChunk(i); - LogicPRegister::ChunkType result; - switch (op) { - case ANDS_p_p_pp_z: - case AND_p_p_pp_z: - result = op1 & op2; - break; - case BICS_p_p_pp_z: - case BIC_p_p_pp_z: - result = op1 & ~op2; - break; - case EORS_p_p_pp_z: - case EOR_p_p_pp_z: - result = op1 ^ op2; - break; - case NANDS_p_p_pp_z: - case NAND_p_p_pp_z: - result = ~(op1 & op2); - break; - case NORS_p_p_pp_z: - case NOR_p_p_pp_z: - result = ~(op1 | op2); - break; - case ORNS_p_p_pp_z: - case ORN_p_p_pp_z: - result = op1 | ~op2; - break; - case ORRS_p_p_pp_z: - case ORR_p_p_pp_z: - result = op1 | op2; - break; - default: - result = 0; - VIXL_UNIMPLEMENTED(); - } - pd.SetChunk(i, result); - } - return pd; -} - -LogicVRegister Simulator::SVEBitwiseImmHelper( - SVEBitwiseLogicalWithImm_UnpredicatedOp op, - VectorFormat vform, - LogicVRegister zd, - uint64_t imm) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t op1 = zd.Uint(vform, i); - uint64_t result; - switch (op) { - case AND_z_zi: - result = op1 & imm; - break; - case EOR_z_zi: - result = op1 ^ imm; - break; - case ORR_z_zi: - result = op1 | imm; - break; - default: - result = 0; - VIXL_UNIMPLEMENTED(); - } - zd.SetUint(vform, i, result); - } - - return zd; -} - -void Simulator::SVEStructuredStoreHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr) { - VIXL_ASSERT(zt_code < kNumberOfZRegisters); - - int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); - int msize_in_bytes = addr.GetMsizeInBytes(); - int reg_count = addr.GetRegCount(); - - VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); - VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); - - unsigned zt_codes[4] = {zt_code, - (zt_code + 1) % kNumberOfZRegisters, - (zt_code + 2) % kNumberOfZRegisters, - (zt_code + 3) % kNumberOfZRegisters}; - - LogicVRegister zt[4] = { - ReadVRegister(zt_codes[0]), - ReadVRegister(zt_codes[1]), - ReadVRegister(zt_codes[2]), - ReadVRegister(zt_codes[3]), - }; - - // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes - // are ignored, so read the source register using the VectorFormat that - // corresponds with the storage format, and multiply the index accordingly. - VectorFormat unpack_vform = - SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); - int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2; - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (!pg.IsActive(vform, i)) continue; - - for (int r = 0; r < reg_count; r++) { - uint64_t element_address = addr.GetElementAddress(i, r); - zt[r].WriteUintToMem(unpack_vform, i << unpack_shift, element_address); - } - } - - if (ShouldTraceWrites()) { - PrintRegisterFormat format = GetPrintRegisterFormat(vform); - if (esize_in_bytes_log2 == msize_in_bytes_log2) { - // Use an FP format where it's likely that we're accessing FP data. - format = GetPrintRegisterFormatTryFP(format); - } - // Stores don't represent a change to the source register's value, so only - // print the relevant part of the value. - format = GetPrintRegPartial(format); - - PrintZStructAccess(zt_code, - reg_count, - pg, - format, - msize_in_bytes, - "->", - addr); - } -} - -void Simulator::SVEStructuredLoadHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr, - bool is_signed) { - int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); - int msize_in_bytes = addr.GetMsizeInBytes(); - int reg_count = addr.GetRegCount(); - - VIXL_ASSERT(zt_code < kNumberOfZRegisters); - VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); - VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); - - unsigned zt_codes[4] = {zt_code, - (zt_code + 1) % kNumberOfZRegisters, - (zt_code + 2) % kNumberOfZRegisters, - (zt_code + 3) % kNumberOfZRegisters}; - LogicVRegister zt[4] = { - ReadVRegister(zt_codes[0]), - ReadVRegister(zt_codes[1]), - ReadVRegister(zt_codes[2]), - ReadVRegister(zt_codes[3]), - }; - - VectorFormat unpack_vform = - SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - for (int r = 0; r < reg_count; r++) { - uint64_t element_address = addr.GetElementAddress(i, r); - - if (!pg.IsActive(vform, i)) { - zt[r].SetUint(vform, i, 0); - continue; - } - - if (is_signed) { - zt[r].ReadIntFromMem(vform, - LaneSizeInBitsFromFormat(unpack_vform), - i, - element_address); - - } else { - zt[r].ReadUintFromMem(vform, - LaneSizeInBitsFromFormat(unpack_vform), - i, - element_address); - } - } - } - - if (ShouldTraceVRegs()) { - PrintRegisterFormat format = GetPrintRegisterFormat(vform); - if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) { - // Use an FP format where it's likely that we're accessing FP data. - format = GetPrintRegisterFormatTryFP(format); - } - PrintZStructAccess(zt_code, - reg_count, - pg, - format, - msize_in_bytes, - "<-", - addr); - } -} - -LogicPRegister Simulator::brka(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn) { - bool break_ = false; - for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { - if (pg.IsActive(kFormatVnB, i)) { - pd.SetActive(kFormatVnB, i, !break_); - break_ |= pn.IsActive(kFormatVnB, i); - } - } - - return pd; -} - -LogicPRegister Simulator::brkb(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn) { - bool break_ = false; - for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { - if (pg.IsActive(kFormatVnB, i)) { - break_ |= pn.IsActive(kFormatVnB, i); - pd.SetActive(kFormatVnB, i, !break_); - } - } - - return pd; -} - -LogicPRegister Simulator::brkn(LogicPRegister pdm, - const LogicPRegister& pg, - const LogicPRegister& pn) { - if (!IsLastActive(kFormatVnB, pg, pn)) { - pfalse(pdm); - } - return pdm; -} - -LogicPRegister Simulator::brkpa(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn, - const LogicPRegister& pm) { - bool last_active = IsLastActive(kFormatVnB, pg, pn); - - for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { - bool active = false; - if (pg.IsActive(kFormatVnB, i)) { - active = last_active; - last_active = last_active && !pm.IsActive(kFormatVnB, i); - } - pd.SetActive(kFormatVnB, i, active); - } - - return pd; -} - -LogicPRegister Simulator::brkpb(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn, - const LogicPRegister& pm) { - bool last_active = IsLastActive(kFormatVnB, pg, pn); - - for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { - bool active = false; - if (pg.IsActive(kFormatVnB, i)) { - last_active = last_active && !pm.IsActive(kFormatVnB, i); - active = last_active; - } - pd.SetActive(kFormatVnB, i, active); - } - - return pd; -} - -void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr, - SVEFaultTolerantLoadType type, - bool is_signed) { - int esize_in_bytes = LaneSizeInBytesFromFormat(vform); - int msize_in_bits = addr.GetMsizeInBits(); - int msize_in_bytes = addr.GetMsizeInBytes(); - - VIXL_ASSERT(zt_code < kNumberOfZRegisters); - VIXL_ASSERT(esize_in_bytes >= msize_in_bytes); - VIXL_ASSERT(addr.GetRegCount() == 1); - - LogicVRegister zt = ReadVRegister(zt_code); - LogicPRegister ffr = ReadFFR(); - - // Non-faulting loads are allowed to fail arbitrarily. To stress user - // code, fail a random element in roughly one in eight full-vector loads. - uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_)); - int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8); - - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - uint64_t value = 0; - - if (pg.IsActive(vform, i)) { - uint64_t element_address = addr.GetElementAddress(i, 0); - - if (type == kSVEFirstFaultLoad) { - // First-faulting loads always load the first active element, regardless - // of FFR. The result will be discarded if its FFR lane is inactive, but - // it could still generate a fault. - value = Memory::Read(msize_in_bytes, element_address); - // All subsequent elements have non-fault semantics. - type = kSVENonFaultLoad; - - } else if (ffr.IsActive(vform, i)) { - // Simulation of fault-tolerant loads relies on system calls, and is - // likely to be relatively slow, so we only actually perform the load if - // its FFR lane is active. - - bool can_read = (i < fake_fault_at_lane) && - CanReadMemory(element_address, msize_in_bytes); - if (can_read) { - value = Memory::Read(msize_in_bytes, element_address); - } else { - // Propagate the fault to the end of FFR. - for (int j = i; j < LaneCountFromFormat(vform); j++) { - ffr.SetActive(vform, j, false); - } - } - } - } - - // The architecture permits a few possible results for inactive FFR lanes - // (including those caused by a fault in this instruction). We choose to - // leave the register value unchanged (like merging predication) because - // no other input to this instruction can have the same behaviour. - // - // Note that this behaviour takes precedence over pg's zeroing predication. - - if (ffr.IsActive(vform, i)) { - int msb = msize_in_bits - 1; - if (is_signed) { - zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value)); - } else { - zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value)); - } - } - } - - if (ShouldTraceVRegs()) { - PrintRegisterFormat format = GetPrintRegisterFormat(vform); - if ((esize_in_bytes == msize_in_bytes) && !is_signed) { - // Use an FP format where it's likely that we're accessing FP data. - format = GetPrintRegisterFormatTryFP(format); - } - // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess - // expects a single mask, so combine the two predicates. - SimPRegister mask; - SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr); - PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr); - } -} - -void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, - VectorFormat vform, - SVEOffsetModifier mod) { - bool is_signed = instr->ExtractBit(14) == 0; - bool is_ff = instr->ExtractBit(13) == 1; - // Note that these instructions don't use the Dtype encoding. - int msize_in_bytes_log2 = instr->ExtractBits(24, 23); - int scale = instr->ExtractBit(21) * msize_in_bytes_log2; - uint64_t base = ReadXRegister(instr->GetRn()); - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - vform, - mod, - scale); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - if (is_ff) { - SVEFaultTolerantLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - kSVEFirstFaultLoad, - is_signed); - } else { - SVEStructuredLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - is_signed); - } -} - -int Simulator::GetFirstActive(VectorFormat vform, - const LogicPRegister& pg) const { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (pg.IsActive(vform, i)) return i; - } - return -1; -} - -int Simulator::GetLastActive(VectorFormat vform, - const LogicPRegister& pg) const { - for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { - if (pg.IsActive(vform, i)) return i; - } - return -1; -} - -int Simulator::CountActiveLanes(VectorFormat vform, - const LogicPRegister& pg) const { - int count = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - count += pg.IsActive(vform, i) ? 1 : 0; - } - return count; -} - -int Simulator::CountActiveAndTrueLanes(VectorFormat vform, - const LogicPRegister& pg, - const LogicPRegister& pn) const { - int count = 0; - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0; - } - return count; -} - -int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform, - int pattern) const { - VIXL_ASSERT(IsSVEFormat(vform)); - int all = LaneCountFromFormat(vform); - VIXL_ASSERT(all > 0); - - switch (pattern) { - case SVE_VL1: - case SVE_VL2: - case SVE_VL3: - case SVE_VL4: - case SVE_VL5: - case SVE_VL6: - case SVE_VL7: - case SVE_VL8: - // VL1-VL8 are encoded directly. - VIXL_STATIC_ASSERT(SVE_VL1 == 1); - VIXL_STATIC_ASSERT(SVE_VL8 == 8); - return (pattern <= all) ? pattern : 0; - case SVE_VL16: - case SVE_VL32: - case SVE_VL64: - case SVE_VL128: - case SVE_VL256: { - // VL16-VL256 are encoded as log2(N) + c. - int min = 16 << (pattern - SVE_VL16); - return (min <= all) ? min : 0; - } - // Special cases. - case SVE_POW2: - return 1 << HighestSetBitPosition(all); - case SVE_MUL4: - return all - (all % 4); - case SVE_MUL3: - return all - (all % 3); - case SVE_ALL: - return all; - } - // Unnamed cases archicturally return 0. - return 0; -} - -uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { - if (IsContiguous()) { - return base_ + (lane * GetRegCount()) * GetMsizeInBytes(); - } - - VIXL_ASSERT(IsScatterGather()); - VIXL_ASSERT(vector_ != NULL); - - // For scatter-gather accesses, we need to extract the offset from vector_, - // and apply modifiers. - - uint64_t offset = 0; - switch (vector_form_) { - case kFormatVnS: - offset = vector_->GetLane<uint32_t>(lane); - break; - case kFormatVnD: - offset = vector_->GetLane<uint64_t>(lane); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - switch (vector_mod_) { - case SVE_MUL_VL: - VIXL_UNIMPLEMENTED(); - break; - case SVE_LSL: - // We apply the shift below. There's nothing to do here. - break; - case NO_SVE_OFFSET_MODIFIER: - VIXL_ASSERT(vector_shift_ == 0); - break; - case SVE_UXTW: - offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset); - break; - case SVE_SXTW: - offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset); - break; - } - - return base_ + (offset << vector_shift_); -} - } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc index 56c6eaf6..85954fc9 100644 --- a/src/aarch64/macro-assembler-aarch64.cc +++ b/src/aarch64/macro-assembler-aarch64.cc @@ -65,7 +65,7 @@ LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { void LiteralPool::Reset() { - std::vector<RawLiteral*>::iterator it, end; + std::vector<RawLiteral *>::iterator it, end; for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { RawLiteral* literal = *it; if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) { @@ -145,7 +145,7 @@ void LiteralPool::Emit(EmitOption option) { } // Now populate the literal pool. - std::vector<RawLiteral*>::iterator it, end; + std::vector<RawLiteral *>::iterator it, end; for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { VIXL_ASSERT((*it)->IsUsed()); masm_->place(*it); @@ -321,13 +321,11 @@ MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic) generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - v_tmp_list_(d31), - p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), + fptmp_list_(d31), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired), - fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { + recommended_checkpoint_(Pool::kNoCheckpointRequired) { checkpoint_ = GetNextCheckPoint(); #ifndef VIXL_DEBUG USE(allow_macro_instructions_); @@ -344,13 +342,11 @@ MacroAssembler::MacroAssembler(size_t capacity, generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - v_tmp_list_(d31), - p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), + fptmp_list_(d31), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired), - fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { + recommended_checkpoint_(Pool::kNoCheckpointRequired) { checkpoint_ = GetNextCheckPoint(); } @@ -365,13 +361,11 @@ MacroAssembler::MacroAssembler(byte* buffer, generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), sp_(sp), tmp_list_(ip0, ip1), - v_tmp_list_(d31), - p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), + fptmp_list_(d31), current_scratch_scope_(NULL), literal_pool_(this), veneer_pool_(this), - recommended_checkpoint_(Pool::kNoCheckpointRequired), - fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { + recommended_checkpoint_(Pool::kNoCheckpointRequired) { checkpoint_ = GetNextCheckPoint(); } @@ -825,12 +819,6 @@ void MacroAssembler::LogicalMacro(const Register& rd, // * 1 instruction to move to sp MacroEmissionCheckScope guard(this); UseScratchRegisterScope temps(this); - // Use `rd` as a temp, if we can. - temps.Include(rd); - // We read `rn` after evaluating `operand`. - temps.Exclude(rn); - // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) - // because we don't need it after it is evaluated. if (operand.IsImmediate()) { uint64_t immediate = operand.GetImmediate(); @@ -898,7 +886,6 @@ void MacroAssembler::LogicalMacro(const Register& rd, } else { // Immediate can't be encoded: synthesize using move immediate. Register temp = temps.AcquireSameSizeAs(rn); - VIXL_ASSERT(!temp.Aliases(rn)); // If the left-hand input is the stack pointer, we can't pre-shift the // immediate, as the encoding won't allow the subsequent post shift. @@ -923,8 +910,8 @@ void MacroAssembler::LogicalMacro(const Register& rd, operand.GetRegister().Is64Bits() || ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); + temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); - VIXL_ASSERT(!temp.Aliases(rn)); EmitExtendShift(temp, operand.GetRegister(), operand.GetExtend(), @@ -1152,13 +1139,17 @@ void MacroAssembler::Mvn(const Register& rd, const Operand& operand) { // Call the macro assembler for generic immediates. Mvn(rd, operand.GetImmediate()); } else if (operand.IsExtendedRegister()) { + UseScratchRegisterScope temps(this); + temps.Exclude(operand.GetRegister()); + // Emit two instructions for the extend case. This differs from Mov, as // the extend and invert can't be achieved in one instruction. - EmitExtendShift(rd, + Register temp = temps.AcquireSameSizeAs(rd); + EmitExtendShift(temp, operand.GetRegister(), operand.GetExtend(), operand.GetShiftAmount()); - mvn(rd, rd); + mvn(rd, Operand(temp)); } else { // Otherwise, register and shifted register cases can be handled by the // assembler directly, using orn. @@ -1427,15 +1418,12 @@ void MacroAssembler::Add(const Register& rd, const Operand& operand, FlagsUpdate S) { VIXL_ASSERT(allow_macro_instructions_); - if (operand.IsImmediate()) { - int64_t imm = operand.GetImmediate(); - if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) && - IsImmAddSub(-imm)) { - AddSubMacro(rd, rn, -imm, S, SUB); - return; - } + if (operand.IsImmediate() && (operand.GetImmediate() < 0) && + IsImmAddSub(-operand.GetImmediate())) { + AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB); + } else { + AddSubMacro(rd, rn, operand, S, ADD); } - AddSubMacro(rd, rn, operand, S, ADD); } @@ -1451,15 +1439,12 @@ void MacroAssembler::Sub(const Register& rd, const Operand& operand, FlagsUpdate S) { VIXL_ASSERT(allow_macro_instructions_); - if (operand.IsImmediate()) { - int64_t imm = operand.GetImmediate(); - if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) && - IsImmAddSub(-imm)) { - AddSubMacro(rd, rn, -imm, S, ADD); - return; - } + if (operand.IsImmediate() && (operand.GetImmediate() < 0) && + IsImmAddSub(-operand.GetImmediate())) { + AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD); + } else { + AddSubMacro(rd, rn, operand, S, SUB); } - AddSubMacro(rd, rn, operand, S, SUB); } @@ -1772,12 +1757,6 @@ void MacroAssembler::AddSubMacro(const Register& rd, (rn.IsZero() && !operand.IsShiftedRegister()) || (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { UseScratchRegisterScope temps(this); - // Use `rd` as a temp, if we can. - temps.Include(rd); - // We read `rn` after evaluating `operand`. - temps.Exclude(rn); - // It doesn't matter if `operand` is in `temps` (e.g. because it alises - // `rd`) because we don't need it after it is evaluated. Register temp = temps.AcquireSameSizeAs(rn); if (operand.IsImmediate()) { PreShiftImmMode mode = kAnyShift; @@ -1863,12 +1842,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, // * 1 instruction for add/sub MacroEmissionCheckScope guard(this); UseScratchRegisterScope temps(this); - // Use `rd` as a temp, if we can. - temps.Include(rd); - // We read `rn` after evaluating `operand`. - temps.Exclude(rn); - // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) - // because we don't need it after it is evaluated. if (operand.IsImmediate() || (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { @@ -1883,6 +1856,7 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, VIXL_ASSERT( IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2, operand.GetShiftAmount())); + temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); EmitShift(temp, operand.GetRegister(), @@ -1898,6 +1872,7 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd, VIXL_ASSERT( operand.GetRegister().Is64Bits() || ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); + temps.Exclude(operand.GetRegister()); Register temp = temps.AcquireSameSizeAs(rn); EmitExtendShift(temp, operand.GetRegister(), @@ -2422,8 +2397,7 @@ void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op, // We do not handle pre-indexing or post-indexing. VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex())); VIXL_ASSERT(!registers.Overlaps(tmp_list_)); - VIXL_ASSERT(!registers.Overlaps(v_tmp_list_)); - VIXL_ASSERT(!registers.Overlaps(p_tmp_list_)); + VIXL_ASSERT(!registers.Overlaps(fptmp_list_)); VIXL_ASSERT(!registers.IncludesAliasOf(sp)); UseScratchRegisterScope temps(this); @@ -2507,7 +2481,7 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) { } -// TODO(all): Fix printf for NEON and SVE registers. +// TODO(all): Fix printf for NEON registers. // This is the main Printf implementation. All callee-saved registers are // preserved, but NZCV and the caller-saved registers may be clobbered. @@ -2790,6 +2764,32 @@ void MacroAssembler::Log(TraceParameters parameters) { } +void MacroAssembler::EnableInstrumentation() { + VIXL_ASSERT(!isprint(InstrumentStateEnable)); + ExactAssemblyScope scope(this, kInstructionSize); + movn(xzr, InstrumentStateEnable); +} + + +void MacroAssembler::DisableInstrumentation() { + VIXL_ASSERT(!isprint(InstrumentStateDisable)); + ExactAssemblyScope scope(this, kInstructionSize); + movn(xzr, InstrumentStateDisable); +} + + +void MacroAssembler::AnnotateInstrumentation(const char* marker_name) { + VIXL_ASSERT(strlen(marker_name) == 2); + + // We allow only printable characters in the marker names. Unprintable + // characters are reserved for controlling features of the instrumentation. + VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1])); + + ExactAssemblyScope scope(this, kInstructionSize); + movn(xzr, (marker_name[1] << 8) | marker_name[0]); +} + + void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) { ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode); } @@ -2870,13 +2870,10 @@ void UseScratchRegisterScope::Open(MacroAssembler* masm) { CPURegList* available = masm->GetScratchRegisterList(); CPURegList* available_v = masm->GetScratchVRegisterList(); - CPURegList* available_p = masm->GetScratchPRegisterList(); old_available_ = available->GetList(); old_available_v_ = available_v->GetList(); - old_available_p_ = available_p->GetList(); VIXL_ASSERT(available->GetType() == CPURegister::kRegister); VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister); - VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister); parent_ = masm->GetCurrentScratchRegisterScope(); masm->SetCurrentScratchRegisterScope(this); @@ -2894,7 +2891,6 @@ void UseScratchRegisterScope::Close() { masm_->GetScratchRegisterList()->SetList(old_available_); masm_->GetScratchVRegisterList()->SetList(old_available_v_); - masm_->GetScratchPRegisterList()->SetList(old_available_p_); masm_ = NULL; } @@ -2903,46 +2899,44 @@ void UseScratchRegisterScope::Close() { bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const { return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) || - masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) || - masm_->GetScratchPRegisterList()->IncludesAliasOf(reg); + masm_->GetScratchVRegisterList()->IncludesAliasOf(reg); } + Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) { - int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode(); + int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode(); return Register(code, size_in_bits); } VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) { - int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode(); + int code = AcquireNextAvailable(masm_->GetScratchVRegisterList()).GetCode(); return VRegister(code, size_in_bits); } void UseScratchRegisterScope::Release(const CPURegister& reg) { VIXL_ASSERT(masm_ != NULL); - - // Release(NoReg) has no effect. - if (reg.IsNone()) return; - - ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode()); + if (reg.IsRegister()) { + ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode()); + } else if (reg.IsVRegister()) { + ReleaseByCode(masm_->GetScratchVRegisterList(), reg.GetCode()); + } else { + VIXL_ASSERT(reg.IsNone()); + } } void UseScratchRegisterScope::Include(const CPURegList& list) { VIXL_ASSERT(masm_ != NULL); - - // Including an empty list has no effect. - if (list.IsEmpty()) return; - VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister); - - RegList reg_list = list.GetList(); if (list.GetType() == CPURegister::kRegister) { // Make sure that neither sp nor xzr are included the list. - reg_list &= ~(xzr.GetBit() | sp.GetBit()); + IncludeByRegList(masm_->GetScratchRegisterList(), + list.GetList() & ~(xzr.GetBit() | sp.GetBit())); + } else { + VIXL_ASSERT(list.GetType() == CPURegister::kVRegister); + IncludeByRegList(masm_->GetScratchVRegisterList(), list.GetList()); } - - IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list); } @@ -2970,43 +2964,13 @@ void UseScratchRegisterScope::Include(const VRegister& reg1, } -void UseScratchRegisterScope::Include(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4) { - RegList include = 0; - RegList include_v = 0; - RegList include_p = 0; - - const CPURegister regs[] = {reg1, reg2, reg3, reg4}; - - for (size_t i = 0; i < ArrayLength(regs); i++) { - RegList bit = regs[i].GetBit(); - switch (regs[i].GetBank()) { - case CPURegister::kNoRegisterBank: - // Include(NoReg) has no effect. - VIXL_ASSERT(regs[i].IsNone()); - break; - case CPURegister::kRRegisterBank: - include |= bit; - break; - case CPURegister::kVRegisterBank: - include_v |= bit; - break; - case CPURegister::kPRegisterBank: - include_p |= bit; - break; - } - } - - IncludeByRegList(masm_->GetScratchRegisterList(), include); - IncludeByRegList(masm_->GetScratchVRegisterList(), include_v); - IncludeByRegList(masm_->GetScratchPRegisterList(), include_p); -} - - void UseScratchRegisterScope::Exclude(const CPURegList& list) { - ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList()); + if (list.GetType() == CPURegister::kRegister) { + ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList()); + } else { + VIXL_ASSERT(list.GetType() == CPURegister::kVRegister); + ExcludeByRegList(masm_->GetScratchVRegisterList(), list.GetList()); + } } @@ -3024,9 +2988,9 @@ void UseScratchRegisterScope::Exclude(const VRegister& reg1, const VRegister& reg2, const VRegister& reg3, const VRegister& reg4) { - RegList exclude_v = + RegList excludefp = reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); - ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); + ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp); } @@ -3035,33 +2999,22 @@ void UseScratchRegisterScope::Exclude(const CPURegister& reg1, const CPURegister& reg3, const CPURegister& reg4) { RegList exclude = 0; - RegList exclude_v = 0; - RegList exclude_p = 0; + RegList excludefp = 0; const CPURegister regs[] = {reg1, reg2, reg3, reg4}; for (size_t i = 0; i < ArrayLength(regs); i++) { - RegList bit = regs[i].GetBit(); - switch (regs[i].GetBank()) { - case CPURegister::kNoRegisterBank: - // Exclude(NoReg) has no effect. - VIXL_ASSERT(regs[i].IsNone()); - break; - case CPURegister::kRRegisterBank: - exclude |= bit; - break; - case CPURegister::kVRegisterBank: - exclude_v |= bit; - break; - case CPURegister::kPRegisterBank: - exclude_p |= bit; - break; + if (regs[i].IsRegister()) { + exclude |= regs[i].GetBit(); + } else if (regs[i].IsVRegister()) { + excludefp |= regs[i].GetBit(); + } else { + VIXL_ASSERT(regs[i].IsNone()); } } ExcludeByRegList(masm_->GetScratchRegisterList(), exclude); - ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); - ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p); + ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp); } @@ -3070,15 +3023,13 @@ void UseScratchRegisterScope::ExcludeAll() { masm_->GetScratchRegisterList()->GetList()); ExcludeByRegList(masm_->GetScratchVRegisterList(), masm_->GetScratchVRegisterList()->GetList()); - ExcludeByRegList(masm_->GetScratchPRegisterList(), - masm_->GetScratchPRegisterList()->GetList()); } -CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available, - RegList mask) { - VIXL_CHECK((available->GetList() & mask) != 0); - CPURegister result = available->PopLowestIndex(mask); +CPURegister UseScratchRegisterScope::AcquireNextAvailable( + CPURegList* available) { + VIXL_CHECK(!available->IsEmpty()); + CPURegister result = available->PopLowestIndex(); VIXL_ASSERT(!AreAliased(result, xzr, sp)); return result; } @@ -3106,22 +3057,5 @@ void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available, available->SetList(available->GetList() & ~exclude); } -CPURegList* UseScratchRegisterScope::GetAvailableListFor( - CPURegister::RegisterBank bank) { - switch (bank) { - case CPURegister::kNoRegisterBank: - return NULL; - case CPURegister::kRRegisterBank: - return masm_->GetScratchRegisterList(); - case CPURegister::kVRegisterBank: - return masm_->GetScratchVRegisterList(); - case CPURegister::kPRegisterBank: - return masm_->GetScratchPRegisterList(); - return NULL; - } - VIXL_UNREACHABLE(); - return NULL; -} - } // namespace aarch64 } // namespace vixl diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 8becddbb..31db8dab 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -35,6 +35,7 @@ #include "../macro-assembler-interface.h" #include "assembler-aarch64.h" +#include "instrument-aarch64.h" // Required for runtime call support. // TODO: Break this dependency. We should be able to separate out the necessary // parts so that we don't need to include the whole simulator header. @@ -60,7 +61,7 @@ #define LSPAIR_MACRO_LIST(V) \ V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \ V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \ - V(Ldpsw, Register&, rt, rt2, LDPSW_x) + V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x) namespace vixl { namespace aarch64 { @@ -527,57 +528,6 @@ class MacroEmissionCheckScope : public EmissionCheckScope { }; -// This scope simplifies the handling of the SVE `movprfx` instruction. -// -// If dst.Aliases(src): -// - Start an ExactAssemblyScope(masm, kInstructionSize). -// Otherwise: -// - Start an ExactAssemblyScope(masm, 2 * kInstructionSize). -// - Generate a suitable `movprfx` instruction. -// -// In both cases, the ExactAssemblyScope is left with enough remaining space for -// exactly one destructive instruction. -class MovprfxHelperScope : public ExactAssemblyScope { - public: - inline MovprfxHelperScope(MacroAssembler* masm, - const ZRegister& dst, - const ZRegister& src); - - inline MovprfxHelperScope(MacroAssembler* masm, - const ZRegister& dst, - const PRegister& pg, - const ZRegister& src); - - // TODO: Implement constructors that examine _all_ sources. If `dst` aliases - // any other source register, we can't use `movprfx`. This isn't obviously - // useful, but the MacroAssembler should not generate invalid code for it. - // Valid behaviour can be implemented using `mov`. - // - // The best way to handle this in an instruction-agnostic way is probably to - // use variadic templates. - - private: - inline bool ShouldGenerateMovprfx(const ZRegister& dst, - const ZRegister& src) { - VIXL_ASSERT(AreSameLaneSize(dst, src)); - return !dst.Aliases(src); - } - - inline bool ShouldGenerateMovprfx(const ZRegister& dst, - const PRegister& pg, - const ZRegister& src) { - VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); - // We need to emit movprfx in two cases: - // 1. To give a predicated merging unary instruction zeroing predication. - // 2. To make destructive instructions constructive. - // - // There are no predicated zeroing instructions that can take movprfx, so we - // will never generate an unnecessary movprfx with this logic. - return pg.IsZeroing() || ShouldGenerateMovprfx(dst, src); - } -}; - - enum BranchType { // Copies of architectural conditions. // The associated conditions can be used in place of those, the code will @@ -616,19 +566,7 @@ enum BranchType { kBranchTypeFirstCondition = eq, kBranchTypeLastCondition = nv, kBranchTypeFirstUsingReg = reg_zero, - kBranchTypeFirstUsingBit = reg_bit_clear, - - // SVE branch conditions. - integer_none = eq, - integer_any = ne, - integer_nlast = cs, - integer_last = cc, - integer_first = mi, - integer_nfrst = pl, - integer_pmore = hi, - integer_plast = ls, - integer_tcont = ge, - integer_tstop = lt + kBranchTypeFirstUsingBit = reg_bit_clear }; @@ -649,18 +587,6 @@ enum PreShiftImmMode { kAnyShift // Allow any pre-shift. }; -enum FPMacroNaNPropagationOption { - // The default option. This generates a run-time error in macros that respect - // this option. - NoFPMacroNaNPropagationSelected, - // For example, Fmin(result, NaN(a), NaN(b)) always selects NaN(a) if both - // NaN(a) and NaN(b) are both quiet, or both are signalling, at the - // cost of extra code generation in some cases. - StrictNaNPropagation, - // For example, Fmin(result, NaN(a), NaN(b)) selects either NaN, but using the - // fewest instructions. - FastNaNPropagation -}; class MacroAssembler : public Assembler, public MacroAssemblerInterface { public: @@ -1020,20 +946,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Claim(const Operand& size); void Drop(const Operand& size); - // As above, but for multiples of the SVE vector length. - void ClaimVL(int64_t multiplier) { - // We never need to worry about sp alignment because the VL is always a - // multiple of 16. - VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); - VIXL_ASSERT(multiplier >= 0); - Addvl(sp, sp, -multiplier); - } - void DropVL(int64_t multiplier) { - VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); - VIXL_ASSERT(multiplier >= 0); - Addvl(sp, sp, multiplier); - } - // Preserve the callee-saved registers (as defined by AAPCS64). // // Higher-numbered registers are pushed before lower-numbered registers, and @@ -1577,8 +1489,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Fmov(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - // TODO: Use DiscardMoveMode to allow this move to be elided if vd.Is(vn). - fmov(vd, vn); + // Only emit an instruction if vd and vn are different, and they are both D + // registers. fmov(s0, s0) is not a no-op because it clears the top word of + // d0. Technically, fmov(d0, d0) is not a no-op either because it clears + // the top of q0, but VRegister does not currently support Q registers. + if (!vd.Is(vn) || !vd.Is64Bits()) { + fmov(vd, vn); + } } void Fmov(const VRegister& vd, const Register& rn) { VIXL_ASSERT(allow_macro_instructions_); @@ -1586,6 +1503,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { SingleEmissionCheckScope guard(this); fmov(vd, rn); } + void Fmov(const VRegister& vd, const XRegister& xn) { + Fmov(vd, Register(xn)); + } + void Fmov(const VRegister& vd, const WRegister& wn) { + Fmov(vd, Register(wn)); + } void Fmov(const VRegister& vd, int index, const Register& rn) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); @@ -3047,43 +2970,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { NEON_2VREG_SHIFT_LONG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) #undef DEFINE_MACRO_ASM_FUNC -// SVE 3 vector register instructions. -#define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \ - V(add, Add) \ - V(and_, And) \ - V(bic, Bic) \ - V(eor, Eor) \ - V(mul, Mul) \ - V(orr, Orr) \ - V(sabd, Sabd) \ - V(smax, Smax) \ - V(smulh, Smulh) \ - V(smin, Smin) \ - V(uabd, Uabd) \ - V(umax, Umax) \ - V(umin, Umin) \ - V(umulh, Umulh) - -#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ - void MASM(const ZRegister& zd, \ - const PRegisterM& pg, \ - const ZRegister& zn, \ - const ZRegister& zm) { \ - VIXL_ASSERT(allow_macro_instructions_); \ - if (zd.Aliases(zn)) { \ - SingleEmissionCheckScope guard(this); \ - ASM(zd, pg, zd, zm); \ - } else if (zd.Aliases(zm)) { \ - SingleEmissionCheckScope guard(this); \ - ASM(zd, pg, zd, zn); \ - } else { \ - MovprfxHelperScope guard(this, zd, pg, zn); \ - ASM(zd, pg, zd, zm); \ - } \ - } - SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) -#undef DEFINE_MACRO_ASM_FUNC - void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); @@ -3471,2901 +3357,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { crc32cx(rd, rn, rm); } - // Scalable Vector Extensions. - void Abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - abs(zd, pg, zn); - } - void Add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - add(zd, zn, zm); - } - void Add(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - AddSubHelper(kAddImmediate, zd, zn, imm); - } - void Addpl(const Register& xd, const Register& xn, int64_t multiplier); - void Addvl(const Register& xd, const Register& xn, int64_t multiplier); - // Note that unlike the core ISA, SVE's `adr` is not PC-relative. - void Adr(const ZRegister& zd, const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - adr(zd, addr); - } - void And(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - and_(pd, pg, pn, pm); - } - void And(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - and_(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void And(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - SingleEmissionCheckScope guard(this); - and_(zd.VnD(), zn.VnD(), zm.VnD()); - } - void Ands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ands(pd, pg, pn, pm); - } - void Andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - andv(vd, pg, zn); - } - void Asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - asr(zd, pg, zd, shift); - } - void Asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Asr(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - asr(zd, zn, shift); - } - void Asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - asr(zd, zn, zm); - } - void Asrd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - asrd(zd, pg, zd, shift); - } - void Bic(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - bic(pd, pg, pn, pm); - } - void Bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - SingleEmissionCheckScope guard(this); - bic(zd.VnD(), zn.VnD(), zm.VnD()); - } - void Bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - bic(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void Bics(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - bics(pd, pg, pn, pm); - } - void Brka(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brka(pd, pg, pn); - } - void Brkas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkas(pd, pg, pn); - } - void Brkb(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkb(pd, pg, pn); - } - void Brkbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkbs(pd, pg, pn); - } - void Brkn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - if (!pd.Aliases(pm)) { - Mov(pd, pm); - } - SingleEmissionCheckScope guard(this); - brkn(pd, pg, pn, pd); - } - void Brkns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - if (!pd.Aliases(pm)) { - Mov(pd, pm); - } - SingleEmissionCheckScope guard(this); - brkns(pd, pg, pn, pd); - } - void Brkpa(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkpa(pd, pg, pn, pm); - } - void Brkpas(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkpas(pd, pg, pn, pm); - } - void Brkpb(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkpb(pd, pg, pn, pm); - } - void Brkpbs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - brkpbs(pd, pg, pn, pm); - } - void Clasta(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - clasta(rd, pg, rn, zm); - } - void Clasta(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - clasta(vd, pg, vn, zm); - } - void Clasta(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - void Clastb(const Register& rd, - const PRegister& pg, - const Register& rn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - clastb(rd, pg, rn, zm); - } - void Clastb(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - clastb(vd, pg, vn, zm); - } - void Clastb(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - void Cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cls(zd, pg, zn); - } - void Clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - clz(zd, pg, zn); - } - void Cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmpeq(pd, pg, zn, zm); - } - void Cmpeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmpeq(pd, pg, zn, imm5); - } else { - CompareHelper(eq, pd, pg, zn, imm); - } - } - void Cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmpge(pd, pg, zn, zm); - } - void Cmpge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmpge(pd, pg, zn, imm5); - } else { - CompareHelper(ge, pd, pg, zn, imm); - } - } - void Cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmpgt(pd, pg, zn, zm); - } - void Cmpgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmpgt(pd, pg, zn, imm5); - } else { - CompareHelper(gt, pd, pg, zn, imm); - } - } - void Cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmphi(pd, pg, zn, zm); - } - void Cmphi(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - if (imm.IsUintN(7)) { - SingleEmissionCheckScope guard(this); - cmphi(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); - } else { - CompareHelper(hi, pd, pg, zn, imm); - } - } - void Cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmphs(pd, pg, zn, zm); - } - void Cmphs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - if (imm.IsUintN(7)) { - SingleEmissionCheckScope guard(this); - cmphs(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); - } else { - CompareHelper(hs, pd, pg, zn, imm); - } - } - void Cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmple(pd, pg, zn, zm); - } - void Cmple(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmple(pd, pg, zn, imm5); - } else { - CompareHelper(le, pd, pg, zn, imm); - } - } - void Cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmplo(pd, pg, zn, zm); - } - void Cmplo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - if (imm.IsUintN(7)) { - SingleEmissionCheckScope guard(this); - cmplo(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); - } else { - CompareHelper(lo, pd, pg, zn, imm); - } - } - void Cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmpls(pd, pg, zn, zm); - } - void Cmpls(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - if (imm.IsUintN(7)) { - SingleEmissionCheckScope guard(this); - cmpls(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7))); - } else { - CompareHelper(ls, pd, pg, zn, imm); - } - } - void Cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmplt(pd, pg, zn, zm); - } - void Cmplt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmplt(pd, pg, zn, imm5); - } else { - CompareHelper(lt, pd, pg, zn, imm); - } - } - void Cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cmpne(pd, pg, zn, zm); - } - void Cmpne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - int imm5; - if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { - SingleEmissionCheckScope guard(this); - cmpne(pd, pg, zn, imm5); - } else { - CompareHelper(ne, pd, pg, zn, imm); - } - } - void Cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cnot(zd, pg, zn); - } - void Cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cnt(zd, pg, zn); - } - void Cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cntb(rd, pattern, multiplier); - } - void Cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cntd(rd, pattern, multiplier); - } - void Cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cnth(rd, pattern, multiplier); - } - void Cntp(const Register& rd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - // The `cntp` instruction architecturally takes an X register, but the - // result will always be in the range [0, kPRegMaxSize] (and therefore - // always fits in a W register), so we can accept a W-sized rd here. - cntp(rd.X(), pg, pn); - } - void Cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cntw(rd, pattern, multiplier); - } - void Compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - compact(zd, pg, zn); - } - void Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm); - void Cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cpy(zd, pg, rn); - } - void Cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - cpy(zd, pg, vn); - } - void Ctermeq(const Register& rn, const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ctermeq(rn, rm); - } - void Ctermne(const Register& rn, const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ctermne(rn, rm); - } - void Decb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decb(rdn, pattern, multiplier); - } - void Decd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decd(rdn, pattern, multiplier); - } - void Decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decd(zdn, pattern, multiplier); - } - void Dech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - dech(rdn, pattern, multiplier); - } - void Dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - dech(zdn, pattern, multiplier); - } - void Decp(const Register& rdn, const PRegisterWithLaneSize& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decp(rdn, pg); - } - void Decp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `decp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - decp(zd, pg); - } - void Decp(const ZRegister& zdn, const PRegister& pg) { Decp(zdn, pg, zdn); } - void Decw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decw(rdn, pattern, multiplier); - } - void Decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - decw(zdn, pattern, multiplier); - } - void Dup(const ZRegister& zd, const Register& xn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - dup(zd, xn); - } - void Dup(const ZRegister& zd, const ZRegister& zn, int index) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - dup(zd, zn, index); - } - void Dup(const ZRegister& zd, IntegerOperand imm); - void Eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - eon(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void Eor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - eor(pd, pg, pn, pm); - } - void Eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - eor(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void Eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - SingleEmissionCheckScope guard(this); - eor(zd.VnD(), zn.VnD(), zm.VnD()); - } - void Eors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - eors(pd, pg, pn, pm); - } - void Eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - eorv(vd, pg, zn); - } - void Ext(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned offset); - void Fabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fabs(zd, pg, zn); - } - void Facge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - facge(pd, pg, zn, zm); - } - void Facgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - facgt(pd, pg, zn, zm); - } - void Facle(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - facge(pd, pg, zm, zn); - } - void Faclt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - facgt(pd, pg, zm, zn); - } - void Fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fadd(zd, pg, zd, imm); - } - void Fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fadd(zd, zn, zm); - } - void Fadda(const VRegister& vd, - const PRegister& pg, - const VRegister& vn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fadda(vd, pg, vn, zm); - } - void Faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - faddv(vd, pg, zn); - } - void Fcadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot); - void Fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmeq(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmeq(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmeq(pd, pg, zn, zm); - } - void Fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmge(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmge(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmge(pd, pg, zn, zm); - } - void Fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmgt(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmgt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmgt(pd, pg, zn, zm); - } - void Fcmla(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zda, pg, zda); - fcmla(zda, pg, zn, zm, rot); - } - void Fcmla(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index, - int rot) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmla(zda, zn, zm, index, rot); - } - void Fcmle(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmle(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmle(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmge(pd, pg, zm, zn); - } - void Fcmlt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmlt(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmlt(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmgt(pd, pg, zm, zn); - } - void Fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - double zero) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (zero == 0.0) { - fcmne(pd, pg, zn, zero); - } else { - // TODO: Synthesise other immediates. - VIXL_UNIMPLEMENTED(); - } - } - void Fcmne(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmne(pd, pg, zn, zm); - } - void Fcmuo(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcmuo(pd, pg, zn, zm); - } - void Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); - void Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm); - void Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm); - void Fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcvt(zd, pg, zn); - } - void Fcvt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - // The element type in this predicated movprfx is determined by the larger - // type between the source and destination. - int lane_size = std::max(zd.GetLaneSizeInBits(), zn.GetLaneSizeInBits()); - MovprfxHelperScope guard(this, - zd.WithLaneSize(lane_size), - pg, - zn.WithLaneSize(lane_size)); - fcvt(zd, pg.Merging(), zn); - } - void Fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcvtzs(zd, pg, zn); - } - void Fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fcvtzu(zd, pg, zn); - } - void Fdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Fdup(const ZRegister& zd, double imm); - void Fdup(const ZRegister& zd, float imm); - void Fdup(const ZRegister& zd, Float16 imm); - void Fexpa(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fexpa(zd, zn); - } - void Fmad(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmad(zdn, pg, zm, za); - } - void Fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fmax(zd, pg, zd, imm); - } - void Fmax( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fmaxnm(zd, pg, zd, imm); - } - void Fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmaxnmv(vd, pg, zn); - } - void Fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmaxv(vd, pg, zn); - } - void Fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fmin(zd, pg, zd, imm); - } - void Fmin( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fminnm(zd, pg, zd, imm); - } - void Fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fminnmv(vd, pg, zn); - } - void Fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fminv(vd, pg, zn); - } - // zd = za + (zn * zm) - void Fmla( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Fmla(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - // zd = za - (zn * zm) - void Fmls( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Fmls(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - void Fmov(const ZRegister& zd, double imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fdup(zd, imm); - } - void Fmov(const ZRegister& zd, float imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fdup(zd, imm); - } - void Fmov(const ZRegister& zd, Float16 imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fdup(zd, imm); - } - void Fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fcpy(zd, pg, imm); - } - void Fmov(const ZRegister& zd, const PRegisterM& pg, float imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fcpy(zd, pg, imm); - } - void Fmov(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { - VIXL_ASSERT(allow_macro_instructions_); - Fcpy(zd, pg, imm); - } - void Fmsb(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zm, - const ZRegister& za) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmsb(zdn, pg, zm, za); - } - void Fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fmul(zd, pg, zd, imm); - } - void Fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fmul(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned index) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmul(zd, zn, zm, index); - } - void Fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fmul(zd, zn, zm); - } - void Fmulx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option); - void Fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fneg(zd, pg, zn); - } - void Fnmla( - const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Fnmls( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); - void Frecpe(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frecpe(zd, zn); - } - void Frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frecps(zd, zn, zm); - } - void Frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frecpx(zd, pg, zn); - } - void Frecpx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frecpx(zd, pg.Merging(), zn); - } - void Frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frinta(zd, pg, zn); - } - void Frinta(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frinta(zd, pg.Merging(), zn); - } - void Frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frinti(zd, pg, zn); - } - void Frinti(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frinti(zd, pg.Merging(), zn); - } - void Frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frintm(zd, pg, zn); - } - void Frintm(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frintm(zd, pg.Merging(), zn); - } - void Frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frintn(zd, pg, zn); - } - void Frintn(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frintn(zd, pg.Merging(), zn); - } - void Frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frintp(zd, pg, zn); - } - void Frintp(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frintp(zd, pg.Merging(), zn); - } - void Frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frintx(zd, pg, zn); - } - void Frintx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frintx(zd, pg.Merging(), zn); - } - void Frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frintz(zd, pg, zn); - } - void Frintz(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - frintz(zd, pg.Merging(), zn); - } - void Frsqrte(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frsqrte(zd, zn); - } - void Frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - frsqrts(zd, zn, zm); - } - void Fscale(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fsqrt(zd, pg, zn); - } - void Fsqrt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fsqrt(zd, pg.Merging(), zn); - } - void Fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fsub(zd, pg, zd, imm); - } - void Fsub(const ZRegister& zd, - const PRegisterM& pg, - double imm, - const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - fsubr(zd, pg, zd, imm); - } - void Fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - fsub(zd, zn, zm); - } - void Ftmad(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int imm3); - void Ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ftsmul(zd, zn, zm); - } - void Ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ftssel(zd, zn, zm); - } - void Incb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incb(rdn, pattern, multiplier); - } - void Incd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incd(rdn, pattern, multiplier); - } - void Incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incd(zdn, pattern, multiplier); - } - void Inch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - inch(rdn, pattern, multiplier); - } - void Inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - inch(zdn, pattern, multiplier); - } - void Incp(const Register& rdn, const PRegisterWithLaneSize& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incp(rdn, pg); - } - void Incp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `incp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - incp(zd, pg); - } - void Incp(const ZRegister& zdn, const PRegister& pg) { Incp(zdn, pg, zdn); } - void Incw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incw(rdn, pattern, multiplier); - } - void Incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - incw(zdn, pattern, multiplier); - } - void Index(const ZRegister& zd, const Operand& start, const Operand& step); - void Insr(const ZRegister& zdn, const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - insr(zdn, rm); - } - void Insr(const ZRegister& zdn, const VRegister& vm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - insr(zdn, vm); - } - void Insr(const ZRegister& zdn, IntegerOperand imm); - void Lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lasta(rd, pg, zn); - } - void Lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lasta(vd, pg, zn); - } - void Lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lastb(rd, pg, zn); - } - void Lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lastb(vd, pg, zn); - } - void Ld1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1rb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rb, - kBRegSizeInBytes); - } - void Ld1rh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rh, - kHRegSizeInBytes); - } - void Ld1rw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rw, - kSRegSizeInBytes); - } - void Ld1rd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rd, - kDRegSizeInBytes); - } - void Ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1rsb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rsb, - kBRegSizeInBytes); - } - void Ld1rsh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rsh, - kHRegSizeInBytes); - } - void Ld1rsw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadBroadcastImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rsw, - kSRegSizeInBytes); - } - void Ld1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ld2b(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld2b(zt1, zt2, pg, addr); - } - void Ld2h(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld2h(zt1, zt2, pg, addr); - } - void Ld2w(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld2w(zt1, zt2, pg, addr); - } - void Ld2d(const ZRegister& zt1, - const ZRegister& zt2, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld2d(zt1, zt2, pg, addr); - } - void Ld3b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld3b(zt1, zt2, zt3, pg, addr); - } - void Ld3h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld3h(zt1, zt2, zt3, pg, addr); - } - void Ld3w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld3w(zt1, zt2, zt3, pg, addr); - } - void Ld3d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld3d(zt1, zt2, zt3, pg, addr); - } - void Ld4b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld4b(zt1, zt2, zt3, zt4, pg, addr); - } - void Ld4h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld4h(zt1, zt2, zt3, zt4, pg, addr); - } - void Ld4w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld4w(zt1, zt2, zt3, zt4, pg, addr); - } - void Ld4d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ld4d(zt1, zt2, zt3, zt4, pg, addr); - } - void Ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1b(zt, pg, xn, zm); - } - void Ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1b(zt, pg, zn, imm5); - } - void Ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1d(zt, pg, xn, zm); - } - void Ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1d(zt, pg, zn, imm5); - } - void Ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1h(zt, pg, xn, zm); - } - void Ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1h(zt, pg, zn, imm5); - } - void Ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sb(zt, pg, xn, zm); - } - void Ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sb(zt, pg, zn, imm5); - } - void Ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sh(zt, pg, xn, zm); - } - void Ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sh(zt, pg, zn, imm5); - } - void Ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sw(zt, pg, xn, zm); - } - void Ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1sw(zt, pg, zn, imm5); - } - void Ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const Register& xn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1w(zt, pg, xn, zm); - } - void Ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const ZRegister& zn, - int imm5) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldff1w(zt, pg, zn, imm5); - } - void Ldnf1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1b(zt, pg, addr); - } - void Ldnf1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1d(zt, pg, addr); - } - void Ldnf1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1h(zt, pg, addr); - } - void Ldnf1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1sb(zt, pg, addr); - } - void Ldnf1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1sh(zt, pg, addr); - } - void Ldnf1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1sw(zt, pg, addr); - } - void Ldnf1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ldnf1w(zt, pg, addr); - } - void Ldnt1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldnt1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldnt1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldnt1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - void Ldr(const CPURegister& rt, const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::ldr); - } - void Lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - lsl(zd, pg, zd, shift); - } - void Lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Lsl(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lsl(zd, zn, shift); - } - void Lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lsl(zd, zn, zm); - } - void Lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - int shift) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, pg, zn); - lsr(zd, pg, zd, shift); - } - void Lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Lsr(const ZRegister& zd, const ZRegister& zn, int shift) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lsr(zd, zn, shift); - } - void Lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - lsr(zd, zn, zm); - } - void Mov(const PRegister& pd, const PRegister& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(pd.VnB(), pn.VnB()); - } - void Mov(const PRegisterWithLaneSize& pd, - const PRegisterM& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(pd, pg, pn); - } - void Mov(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(pd, pg, pn); - } - void Mov(const ZRegister& zd, const Register& xn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, xn); - } - - void Mov(const ZRegister& zd, const VRegister& vn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, vn); - } - - void Mov(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, zn); - } - void Mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, zn, index); - } - void Mov(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - Cpy(zd, pg, imm); - } - // TODO: support zeroing predicated moves using movprfx. - void Mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, pg, rn); - } - void Mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, pg, vn); - } - void Mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - mov(zd, pg, zn); - } - void Mov(const ZRegister& zd, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - Dup(zd, imm); - } - void Movs(const PRegister& pd, const PRegister& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - movs(pd, pn); - } - void Movs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - movs(pd, pg, pn); - } - // zd = za + (zn * zm) - void Mla(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm); - // zd = za - (zn * zm) - void Mls(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm); - void Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); - void Nand(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - nand(pd, pg, pn, pm); - } - void Nands(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - nands(pd, pg, pn, pm); - } - // There is no instruction with this form, but we can implement it using - // `subr`. - void Neg(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - MovprfxHelperScope guard(this, zd, zn); - subr(zd, zd, 0); - } - void Neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - neg(zd, pg, zn); - } - void Nor(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - nor(pd, pg, pn, pm); - } - void Nors(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - nors(pd, pg, pn, pm); - } - void Not(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - not_(pd, pg, pn); - } - void Not(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - not_(zd, pg, zn); - } - void Nots(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - nots(pd, pg, pn); - } - void Orn(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - orn(pd, pg, pn, pm); - } - void Orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - orn(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void Orns(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - orns(pd, pg, pn, pm); - } - void Orr(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - orr(pd, pg, pn, pm); - } - void Orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { - orr(zd, zn, imm); - } else { - // TODO: Synthesise the immediate once 'Mov' is implemented. - VIXL_UNIMPLEMENTED(); - } - } - void Orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); - SingleEmissionCheckScope guard(this); - orr(zd.VnD(), zn.VnD(), zm.VnD()); - } - void Orrs(const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - orrs(pd, pg, pn, pm); - } - void Orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - orv(vd, pg, zn); - } - void Pfalse(const PRegister& pd) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(pd.IsUnqualified()); - SingleEmissionCheckScope guard(this); - // No matter what the lane size is, overall this operation just writes zeros - // throughout the register. - pfalse(pd.VnB()); - } - void Pfirst(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - void Pnext(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn); - void Prfb(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - prfb(prfop, pg, addr); - } - void Prfh(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - prfh(prfop, pg, addr); - } - void Prfw(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - prfw(prfop, pg, addr); - } - void Prfd(PrefetchOperation prfop, - const PRegister& pg, - const SVEMemOperand addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - prfd(prfop, pg, addr); - } - void Ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ptest(pg, pn); - } - void Ptrue(const PRegisterWithLaneSize& pd, - SVEPredicateConstraint pattern, - FlagsUpdate s); - void Ptrue(const PRegisterWithLaneSize& pd, - SVEPredicateConstraint pattern = SVE_ALL) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ptrue(pd, pattern); - } - void Ptrues(const PRegisterWithLaneSize& pd, - SVEPredicateConstraint pattern = SVE_ALL) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ptrues(pd, pattern); - } - void Punpkhi(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - punpkhi(pd, pn); - } - void Punpklo(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - punpklo(pd, pn); - } - void Rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - rbit(zd, pg, zn); - } - void Rdffr(const PRegister& pd) { - VIXL_ASSERT(allow_macro_instructions_); - // Although this is essentially just a move, it writes every bit and so can - // only support b-sized lane because other lane sizes would simplicity clear - // bits in `pd`. - VIXL_ASSERT(!pd.HasLaneSize() || pd.IsLaneSizeB()); - VIXL_ASSERT(pd.IsUnqualified()); - SingleEmissionCheckScope guard(this); - rdffr(pd.VnB()); - } - void Rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - rdffr(pd, pg); - } - void Rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - rdffrs(pd, pg); - } - // Note that there is no `rdpl` instruction, but this macro emulates it (for - // symmetry with `Rdvl`). - void Rdpl(const Register& xd, int64_t multiplier) { - VIXL_ASSERT(allow_macro_instructions_); - Addpl(xd, xzr, multiplier); - } - void Rdvl(const Register& xd, int64_t multiplier) { - VIXL_ASSERT(allow_macro_instructions_); - Addvl(xd, xzr, multiplier); - } - void Rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - rev(pd, pn); - } - void Rev(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - rev(zd, zn); - } - void Revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - revb(zd, pg, zn); - } - void Revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - revh(zd, pg, zn); - } - void Revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - revw(zd, pg, zn); - } - void Saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - saddv(dd, pg, zn); - } - void Scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - scvtf(zd, pg, zn); - } - void Sdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Sdot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm); - void Sdot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - void Sel(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sel(pd, pg, pn, pm); - } - void Sel(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sel(zd, pg, zn, zm); - } - void Setffr() { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - setffr(); - } - void Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); - void Smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - smaxv(vd, pg, zn); - } - void Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); - void Sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sminv(vd, pg, zn); - } - void Splice(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm); - void Sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqadd(zd, zn, zm); - } - void Sqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.IsUint8() || - (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); - MovprfxHelperScope guard(this, zd, zn); - sqadd(zd, zd, imm.AsUint16()); - } - void Sqdecb(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecb(xd, wn, pattern, multiplier); - } - void Sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecb(rdn, pattern, multiplier); - } - void Sqdecd(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecd(xd, wn, pattern, multiplier); - } - void Sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecd(rdn, pattern, multiplier); - } - void Sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecd(zdn, pattern, multiplier); - } - void Sqdech(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdech(xd, wn, pattern, multiplier); - } - void Sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdech(rdn, pattern, multiplier); - } - void Sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdech(zdn, pattern, multiplier); - } - void Sqdecp(const Register& xdn, - const PRegisterWithLaneSize& pg, - const Register& wdn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecp(xdn, pg, wdn); - } - void Sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecp(xdn, pg); - } - void Sqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `sqdecp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - sqdecp(zd, pg); - } - void Sqdecp(const ZRegister& zdn, const PRegister& pg) { - Sqdecp(zdn, pg, zdn); - } - void Sqdecw(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecw(xd, wn, pattern, multiplier); - } - void Sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecw(rdn, pattern, multiplier); - } - void Sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqdecw(zdn, pattern, multiplier); - } - void Sqincb(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincb(xd, wn, pattern, multiplier); - } - void Sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincb(rdn, pattern, multiplier); - } - void Sqincd(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincd(xd, wn, pattern, multiplier); - } - void Sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincd(rdn, pattern, multiplier); - } - void Sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincd(zdn, pattern, multiplier); - } - void Sqinch(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqinch(xd, wn, pattern, multiplier); - } - void Sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqinch(rdn, pattern, multiplier); - } - void Sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqinch(zdn, pattern, multiplier); - } - void Sqincp(const Register& xdn, - const PRegisterWithLaneSize& pg, - const Register& wdn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincp(xdn, pg, wdn); - } - void Sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincp(xdn, pg); - } - void Sqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `sqincp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - sqincp(zd, pg); - } - void Sqincp(const ZRegister& zdn, const PRegister& pg) { - Sqincp(zdn, pg, zdn); - } - void Sqincw(const Register& xd, - const Register& wn, - int pattern = SVE_ALL, - int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincw(xd, wn, pattern, multiplier); - } - void Sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincw(rdn, pattern, multiplier); - } - void Sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqincw(zdn, pattern, multiplier); - } - void Sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sqsub(zd, zn, zm); - } - void Sqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.IsUint8() || - (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); - MovprfxHelperScope guard(this, zd, zn); - sqsub(zd, zd, imm.AsUint16()); - } - void St1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void St1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void St1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void St1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void St2b(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st2b(zt1, zt2, pg, addr); - } - void St2h(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st2h(zt1, zt2, pg, addr); - } - void St2w(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st2w(zt1, zt2, pg, addr); - } - void St2d(const ZRegister& zt1, - const ZRegister& zt2, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st2d(zt1, zt2, pg, addr); - } - void St3b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st3b(zt1, zt2, zt3, pg, addr); - } - void St3h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st3h(zt1, zt2, zt3, pg, addr); - } - void St3w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st3w(zt1, zt2, zt3, pg, addr); - } - void St3d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st3d(zt1, zt2, zt3, pg, addr); - } - void St4b(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st4b(zt1, zt2, zt3, zt4, pg, addr); - } - void St4h(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st4h(zt1, zt2, zt3, zt4, pg, addr); - } - void St4w(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st4w(zt1, zt2, zt3, zt4, pg, addr); - } - void St4d(const ZRegister& zt1, - const ZRegister& zt2, - const ZRegister& zt3, - const ZRegister& zt4, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - st4d(zt1, zt2, zt3, zt4, pg, addr); - } - void Stnt1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void Stnt1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void Stnt1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void Stnt1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - void Str(const CPURegister& rt, const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::str); - } - void Sub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sub(zd, zn, zm); - } - void Sub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - AddSubHelper(kSubImmediate, zd, zn, imm); - } - void Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm); - void Sunpkhi(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sunpkhi(zd, zn); - } - void Sunpklo(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sunpklo(zd, zn); - } - void Sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sxtb(zd, pg, zn); - } - void Sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sxth(zd, pg, zn); - } - void Sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - sxtw(zd, pg, zn); - } - void Tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - tbl(zd, zn, zm); - } - void Trn1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - trn1(pd, pn, pm); - } - void Trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - trn1(zd, zn, zm); - } - void Trn2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - trn2(pd, pn, pm); - } - void Trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - trn2(zd, zn, zm); - } - void Uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uaddv(dd, pg, zn); - } - void Ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - ucvtf(zd, pg, zn); - } - void Udiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - void Udot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm); - void Udot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - void Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); - void Umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - umaxv(vd, pg, zn); - } - void Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); - void Uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uminv(vd, pg, zn); - } - void Uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqadd(zd, zn, zm); - } - void Uqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.IsUint8() || - (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); - MovprfxHelperScope guard(this, zd, zn); - uqadd(zd, zd, imm.AsUint16()); - } - void Uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdecb(rdn, pattern, multiplier); - } - void Uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdecd(rdn, pattern, multiplier); - } - void Uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdecd(zdn, pattern, multiplier); - } - void Uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdech(rdn, pattern, multiplier); - } - void Uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdech(zdn, pattern, multiplier); - } - // The saturation is based on the size of `rn`. The result is zero-extended - // into `rd`, which must be at least as big. - void Uqdecp(const Register& rd, - const PRegisterWithLaneSize& pg, - const Register& rn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(rd.Aliases(rn)); - VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); - SingleEmissionCheckScope guard(this); - if (rn.Is64Bits()) { - uqdecp(rd, pg); - } else { - // Convert <Xd> into <Wd>, to make this more consistent with Sqdecp. - uqdecp(rd.W(), pg); - } - } - void Uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { - Uqdecp(rdn, pg, rdn); - } - void Uqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `sqdecp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - uqdecp(zd, pg); - } - void Uqdecp(const ZRegister& zdn, const PRegister& pg) { - Uqdecp(zdn, pg, zdn); - } - void Uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdecw(rdn, pattern, multiplier); - } - void Uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqdecw(zdn, pattern, multiplier); - } - void Uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqincb(rdn, pattern, multiplier); - } - void Uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqincd(rdn, pattern, multiplier); - } - void Uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqincd(zdn, pattern, multiplier); - } - void Uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqinch(rdn, pattern, multiplier); - } - void Uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqinch(zdn, pattern, multiplier); - } - // The saturation is based on the size of `rn`. The result is zero-extended - // into `rd`, which must be at least as big. - void Uqincp(const Register& rd, - const PRegisterWithLaneSize& pg, - const Register& rn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(rd.Aliases(rn)); - VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); - SingleEmissionCheckScope guard(this); - if (rn.Is64Bits()) { - uqincp(rd, pg); - } else { - // Convert <Xd> into <Wd>, to make this more consistent with Sqincp. - uqincp(rd.W(), pg); - } - } - void Uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { - Uqincp(rdn, pg, rdn); - } - void Uqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(zd, zn)); - // `sqincp` writes every lane, so use an unpredicated movprfx. - MovprfxHelperScope guard(this, zd, zn); - uqincp(zd, pg); - } - void Uqincp(const ZRegister& zdn, const PRegister& pg) { - Uqincp(zdn, pg, zdn); - } - void Uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqincw(rdn, pattern, multiplier); - } - void Uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqincw(zdn, pattern, multiplier); - } - void Uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uqsub(zd, zn, zm); - } - void Uqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.IsUint8() || - (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); - MovprfxHelperScope guard(this, zd, zn); - uqsub(zd, zd, imm.AsUint16()); - } - void Uunpkhi(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uunpkhi(zd, zn); - } - void Uunpklo(const ZRegister& zd, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uunpklo(zd, zn); - } - void Uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uxtb(zd, pg, zn); - } - void Uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uxth(zd, pg, zn); - } - void Uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uxtw(zd, pg, zn); - } - void Uzp1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uzp1(pd, pn, pm); - } - void Uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uzp1(zd, zn, zm); - } - void Uzp2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uzp2(pd, pn, pm); - } - void Uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - uzp2(zd, zn, zm); - } - void Whilele(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - whilele(pd, rn, rm); - } - void Whilelo(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - whilelo(pd, rn, rm); - } - void Whilels(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - whilels(pd, rn, rm); - } - void Whilelt(const PRegisterWithLaneSize& pd, - const Register& rn, - const Register& rm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - whilelt(pd, rn, rm); - } - void Wrffr(const PRegister& pn) { - VIXL_ASSERT(allow_macro_instructions_); - // Although this is essentially just a move, it writes every bit and so can - // only support b-sized lane because other lane sizes would implicitly clear - // bits in `ffr`. - VIXL_ASSERT(!pn.HasLaneSize() || pn.IsLaneSizeB()); - VIXL_ASSERT(pn.IsUnqualified()); - SingleEmissionCheckScope guard(this); - wrffr(pn.VnB()); - } - void Zip1(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - zip1(pd, pn, pm); - } - void Zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - zip1(zd, zn, zm); - } - void Zip2(const PRegisterWithLaneSize& pd, - const PRegisterWithLaneSize& pn, - const PRegisterWithLaneSize& pm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - zip2(pd, pn, pm); - } - void Zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SingleEmissionCheckScope guard(this); - zip2(zd, zn, zm); - } - template <typename T> Literal<T>* CreateLiteralDestroyedWithPool(T value) { return new Literal<T>(value, @@ -6489,13 +3480,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { return GetScratchRegisterList(); } - CPURegList* GetScratchVRegisterList() { return &v_tmp_list_; } + CPURegList* GetScratchVRegisterList() { return &fptmp_list_; } VIXL_DEPRECATED("GetScratchVRegisterList", CPURegList* FPTmpList()) { return GetScratchVRegisterList(); } - CPURegList* GetScratchPRegisterList() { return &p_tmp_list_; } - // Get or set the current (most-deeply-nested) UseScratchRegisterScope. void SetCurrentScratchRegisterScope(UseScratchRegisterScope* scope) { current_scratch_scope_ = scope; @@ -6559,6 +3548,16 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // Will output the flags. void Log(TraceParameters parameters); + // Enable or disable instrumentation when an Instrument visitor is attached to + // the simulator. + void EnableInstrumentation(); + void DisableInstrumentation(); + + // Add a marker to the instrumentation data produced by an Instrument visitor. + // The name is a two character string that will be attached to the marker in + // the output data. + void AnnotateInstrumentation(const char* marker_name); + // Enable or disable CPU features dynamically. This mechanism allows users to // strictly check the use of CPU features in different regions of code. void SetSimulatorCPUFeatures(const CPUFeatures& features); @@ -6662,36 +3661,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { Condition cond, bool* should_synthesise_left); - // Generate code to calculate the address represented by `addr` and write it - // into `xd`. This is used as a common fall-back for out-of-range load and - // store operands. - // - // The vl_divisor_log2 argument is used to scale the VL, for use with - // SVE_MUL_VL. - void CalculateSVEAddress(const Register& xd, - const SVEMemOperand& addr, - int vl_divisor_log2 = 0); - - void CalculateSVEAddress(const Register& xd, - const SVEMemOperand& addr, - const CPURegister& rt) { - VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); - int vl_divisor_log2 = rt.IsPRegister() ? kZRegBitsPerPRegBitLog2 : 0; - CalculateSVEAddress(xd, addr, vl_divisor_log2); - } - - void SetFPNaNPropagationOption(FPMacroNaNPropagationOption nan_option) { - fp_nan_propagation_ = nan_option; - } - - void ResolveFPNaNPropagationOption(FPMacroNaNPropagationOption* nan_option) { - // The input option has priority over the option that has set. - if (*nan_option == NoFPMacroNaNPropagationSelected) { - *nan_option = fp_nan_propagation_; - } - VIXL_ASSERT(*nan_option != NoFPMacroNaNPropagationSelected); - } - private: // The actual Push and Pop implementations. These don't generate any code // other than that required for the push or pop. This allows @@ -6745,183 +3714,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features, DebugHltOpcode action); - void CompareHelper(Condition cond, - const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm); - - // E.g. Ld1rb. - typedef void (Assembler::*SVELoadBroadcastFn)(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - - void SVELoadBroadcastImmHelper(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - SVELoadBroadcastFn fn, - int divisor); - - // E.g. ldr/str - typedef void (Assembler::*SVELoadStoreFn)(const CPURegister& rt, - const SVEMemOperand& addr); - - void SVELoadStoreScalarImmHelper(const CPURegister& rt, - const SVEMemOperand& addr, - SVELoadStoreFn fn); - - typedef void (Assembler::*SVELoad1Fn)(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr); - typedef void (Assembler::*SVEStore1Fn)(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr); - - // Helper for predicated Z register loads with addressing modes not directly - // encodable in the instruction. The supported_modifier parameter indicates - // which offset modifier the calling instruction encoder supports (eg. - // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as - // vl_divisor_log2; pass -1 to indicate no dependency. - template <typename Tg, typename Tf> - void SVELoadStoreScalarImmHelper( - const ZRegister& zt, - const Tg& pg, - const SVEMemOperand& addr, - Tf fn, - int imm_bits, - int shift_amount, - SVEOffsetModifier supported_modifier = NO_SVE_OFFSET_MODIFIER, - int vl_divisor_log2 = 0); - - template <typename Tg, typename Tf> - void SVELoadStore1Helper(int msize_in_bytes_log2, - const ZRegister& zt, - const Tg& pg, - const SVEMemOperand& addr, - Tf fn); - - template <typename Tf> - void SVELoadFFHelper(int msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - Tf fn); - - typedef void (MacroAssembler::*IntWideImmMacroFn)(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm); - - typedef void (Assembler::*IntWideImmShiftFn)(const ZRegister& zd, - const ZRegister& zn, - int imm, - int shift); - - typedef void (Assembler::*IntArithFn)(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm); - - typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd, - const ZRegister& zn, - int imm); - - typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int index); - - typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - void IntWideImmHelper(IntWideImmFn imm_fn, - SVEArithPredicatedFn reg_fn, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm, - bool is_signed_imm); - - enum AddSubHelperOption { kAddImmediate, kSubImmediate }; - - void AddSubHelper(AddSubHelperOption option, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm); - - // Try to emit an add- or sub-like instruction (imm_fn) with `imm`, or the - // corresponding sub- or add-like instruction (n_imm_fn) with a negated `imm`. - // A `movprfx` is automatically generated if one is required. If successful, - // return true. Otherwise, return false. - // - // This helper uses two's complement equivalences, for example treating 0xffff - // as -1 for H-sized lanes. - bool TrySingleAddSub(AddSubHelperOption option, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm); - - void SVESdotUdotHelper(IntArithFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm); - - void SVESdotUdotIndexHelper(IntArithIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - - // For noncommutative arithmetic operations. - void NoncommutativeArithmeticHelper(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEArithPredicatedFn fn, - SVEArithPredicatedFn rev_fn); - - void FPCommutativeArithmeticHelper(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEArithPredicatedFn fn, - FPMacroNaNPropagationOption nan_option); - - // Floating-point fused multiply-add vectors (predicated), writing addend. - typedef void (Assembler::*SVEMulAddPredicatedZdaFn)(const ZRegister& zda, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - // Floating-point fused multiply-add vectors (predicated), writing - // multiplicand. - typedef void (Assembler::*SVEMulAddPredicatedZdnFn)(const ZRegister& zdn, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm); - - void FPMulAddHelper(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - SVEMulAddPredicatedZdaFn fn_zda, - SVEMulAddPredicatedZdnFn fn_zdn, - FPMacroNaNPropagationOption nan_option); - - typedef void (Assembler::*SVEMulAddIndexFn)(const ZRegister& zda, - const ZRegister& zn, - const ZRegister& zm, - int index); - - void FPMulAddIndexHelper(SVEMulAddIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index); - // Tell whether any of the macro instruction can be used. When false the // MacroAssembler will assert if a method which can emit a variable number // of instructions is called. @@ -6935,8 +3727,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { // Scratch registers available for use by the MacroAssembler. CPURegList tmp_list_; - CPURegList v_tmp_list_; - CPURegList p_tmp_list_; + CPURegList fptmp_list_; UseScratchRegisterScope* current_scratch_scope_; @@ -6946,8 +3737,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { ptrdiff_t checkpoint_; ptrdiff_t recommended_checkpoint_; - FPMacroNaNPropagationOption fp_nan_propagation_; - friend class Pool; friend class LiteralPool; }; @@ -7016,35 +3805,11 @@ class BlockPoolsScope { MacroAssembler* masm_; }; -MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, - const ZRegister& dst, - const ZRegister& src) - : ExactAssemblyScope(masm, - ShouldGenerateMovprfx(dst, src) - ? (2 * kInstructionSize) - : kInstructionSize) { - if (ShouldGenerateMovprfx(dst, src)) { - masm->movprfx(dst, src); - } -} - -MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, - const ZRegister& dst, - const PRegister& pg, - const ZRegister& src) - : ExactAssemblyScope(masm, - ShouldGenerateMovprfx(dst, pg, src) - ? (2 * kInstructionSize) - : kInstructionSize) { - if (ShouldGenerateMovprfx(dst, pg, src)) { - masm->movprfx(dst, pg, src); - } -} // This scope utility allows scratch registers to be managed safely. The -// MacroAssembler's GetScratch*RegisterList() are used as a pool of scratch -// registers. These registers can be allocated on demand, and will be returned -// at the end of the scope. +// MacroAssembler's GetScratchRegisterList() (and GetScratchVRegisterList()) is +// used as a pool of scratch registers. These registers can be allocated on +// demand, and will be returned at the end of the scope. // // When the scope ends, the MacroAssembler's lists will be restored to their // original state, even if the lists were modified by some other means. @@ -7054,22 +3819,14 @@ class UseScratchRegisterScope { // must not be `NULL`), so it is ready to use immediately after it has been // constructed. explicit UseScratchRegisterScope(MacroAssembler* masm) - : masm_(NULL), - parent_(NULL), - old_available_(0), - old_available_v_(0), - old_available_p_(0) { + : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) { Open(masm); } // This constructor does not implicitly initialise the scope. Instead, the // user is required to explicitly call the `Open` function before using the // scope. UseScratchRegisterScope() - : masm_(NULL), - parent_(NULL), - old_available_(0), - old_available_v_(0), - old_available_p_(0) {} + : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {} // This function performs the actual initialisation work. void Open(MacroAssembler* masm); @@ -7084,43 +3841,26 @@ class UseScratchRegisterScope { bool IsAvailable(const CPURegister& reg) const; + // Take a register from the appropriate temps list. It will be returned // automatically when the scope ends. Register AcquireW() { - return AcquireFrom(masm_->GetScratchRegisterList()).W(); + return AcquireNextAvailable(masm_->GetScratchRegisterList()).W(); } Register AcquireX() { - return AcquireFrom(masm_->GetScratchRegisterList()).X(); + return AcquireNextAvailable(masm_->GetScratchRegisterList()).X(); } VRegister AcquireH() { - return AcquireFrom(masm_->GetScratchVRegisterList()).H(); + return AcquireNextAvailable(masm_->GetScratchVRegisterList()).H(); } VRegister AcquireS() { - return AcquireFrom(masm_->GetScratchVRegisterList()).S(); + return AcquireNextAvailable(masm_->GetScratchVRegisterList()).S(); } VRegister AcquireD() { - return AcquireFrom(masm_->GetScratchVRegisterList()).D(); - } - ZRegister AcquireZ() { - return AcquireFrom(masm_->GetScratchVRegisterList()).Z(); - } - PRegister AcquireP() { - // Prefer to allocate p8-p15 if we can, to leave p0-p7 available for use as - // governing predicates. - CPURegList* available = masm_->GetScratchPRegisterList(); - RegList preferred = ~kGoverningPRegisterMask; - if ((available->GetList() & preferred) != 0) { - return AcquireFrom(available, preferred).P(); - } - return AcquireFrom(available).P(); - } - // Acquire a P register suitable for use as a governing predicate in - // instructions which only accept p0-p7 for that purpose. - PRegister AcquireGoverningP() { - CPURegList* available = masm_->GetScratchPRegisterList(); - return AcquireFrom(available, kGoverningPRegisterMask).P(); + return AcquireNextAvailable(masm_->GetScratchVRegisterList()).D(); } + Register AcquireRegisterOfSize(int size_in_bits); Register AcquireSameSizeAs(const Register& reg) { return AcquireRegisterOfSize(reg.GetSizeInBits()); @@ -7135,12 +3875,6 @@ class UseScratchRegisterScope { : CPURegister(AcquireRegisterOfSize(size_in_bits)); } - // Acquire a register big enough to represent one lane of `vector`. - Register AcquireRegisterToHoldLane(const CPURegister& vector) { - VIXL_ASSERT(vector.GetLaneSizeInBits() <= kXRegSize); - return (vector.GetLaneSizeInBits() > kWRegSize) ? AcquireX() : AcquireW(); - } - // Explicitly release an acquired (or excluded) register, putting it back in // the appropriate temps list. @@ -7158,10 +3892,6 @@ class UseScratchRegisterScope { const VRegister& reg2 = NoVReg, const VRegister& reg3 = NoVReg, const VRegister& reg4 = NoVReg); - void Include(const CPURegister& reg1, - const CPURegister& reg2 = NoCPUReg, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg); // Make sure that the specified registers are not available in this scope. @@ -7181,39 +3911,20 @@ class UseScratchRegisterScope { const CPURegister& reg3 = NoCPUReg, const CPURegister& reg4 = NoCPUReg); - // Convenience for excluding registers that are part of Operands. This is - // useful for sequences like this: - // - // // Use 'rd' as a scratch, but only if it's not aliased by an input. - // temps.Include(rd); - // temps.Exclude(rn); - // temps.Exclude(operand); - // - // Otherwise, a conditional check is needed on the last 'Exclude'. - void Exclude(const Operand& operand) { - if (operand.IsShiftedRegister() || operand.IsExtendedRegister()) { - Exclude(operand.GetRegister()); - } else { - VIXL_ASSERT(operand.IsImmediate()); - } - } // Prevent any scratch registers from being used in this scope. void ExcludeAll(); private: - static CPURegister AcquireFrom(CPURegList* available, - RegList mask = ~static_cast<RegList>(0)); + static CPURegister AcquireNextAvailable(CPURegList* available); static void ReleaseByCode(CPURegList* available, int code); + static void ReleaseByRegList(CPURegList* available, RegList regs); - static void IncludeByRegList(CPURegList* available, RegList exclude); - static void ExcludeByRegList(CPURegList* available, RegList exclude); - CPURegList* GetAvailableListFor(CPURegister::RegisterBank bank); + static void IncludeByRegList(CPURegList* available, RegList exclude); - static const RegList kGoverningPRegisterMask = - (static_cast<RegList>(1) << kNumberOfGoverningPRegisters) - 1; + static void ExcludeByRegList(CPURegList* available, RegList exclude); // The MacroAssembler maintains a list of available scratch registers, and // also keeps track of the most recently-opened scope so that on destruction @@ -7223,8 +3934,7 @@ class UseScratchRegisterScope { // The state of the available lists at the start of this scope. RegList old_available_; // kRegister - RegList old_available_v_; // kVRegister / kZRegister - RegList old_available_p_; // kPRegister + RegList old_available_v_; // kVRegister // Disallow copy constructor and operator=. VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope( @@ -7245,11 +3955,23 @@ class UseScratchRegisterScope { // features needs a corresponding macro instruction. class SimulationCPUFeaturesScope { public: - template <typename... T> - explicit SimulationCPUFeaturesScope(MacroAssembler* masm, T... features) - : masm_(masm), cpu_features_scope_(masm, features...) { + explicit SimulationCPUFeaturesScope( + MacroAssembler* masm, + CPUFeatures::Feature feature0 = CPUFeatures::kNone, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) + : masm_(masm), + cpu_features_scope_(masm, feature0, feature1, feature2, feature3) { + masm_->SaveSimulatorCPUFeatures(); + masm_->EnableSimulatorCPUFeatures( + CPUFeatures(feature0, feature1, feature2, feature3)); + } + + SimulationCPUFeaturesScope(MacroAssembler* masm, const CPUFeatures& other) + : masm_(masm), cpu_features_scope_(masm, other) { masm_->SaveSimulatorCPUFeatures(); - masm_->EnableSimulatorCPUFeatures(CPUFeatures(features...)); + masm_->EnableSimulatorCPUFeatures(other); } ~SimulationCPUFeaturesScope() { masm_->RestoreSimulatorCPUFeatures(); } diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc deleted file mode 100644 index b107f132..00000000 --- a/src/aarch64/macro-assembler-sve-aarch64.cc +++ /dev/null @@ -1,2027 +0,0 @@ -// Copyright 2019, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "macro-assembler-aarch64.h" - -namespace vixl { -namespace aarch64 { - -void MacroAssembler::AddSubHelper(AddSubHelperOption option, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(imm.FitsInLane(zd)); - - // Simple, encodable cases. - if (TrySingleAddSub(option, zd, zn, imm)) return; - - VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate)); - bool add_imm = (option == kAddImmediate); - - // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one - // instruction. Also interpret the immediate as signed, so we can convert - // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc. - IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits())); - if (signed_imm.IsNegative()) { - AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate; - IntegerOperand n_imm(signed_imm.GetMagnitude()); - // IntegerOperand can represent -INT_MIN, so this is always safe. - VIXL_ASSERT(n_imm.IsPositiveOrZero()); - if (TrySingleAddSub(n_option, zd, zn, n_imm)) return; - } - - // Otherwise, fall back to dup + ADD_z_z/SUB_z_z. - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); - Dup(scratch, imm); - - SingleEmissionCheckScope guard(this); - if (add_imm) { - add(zd, zn, scratch); - } else { - sub(zd, zn, scratch); - } -} - -bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(imm.FitsInLane(zd)); - - int imm8; - int shift = -1; - if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || - imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { - MovprfxHelperScope guard(this, zd, zn); - switch (option) { - case kAddImmediate: - add(zd, zd, imm8, shift); - return true; - case kSubImmediate: - sub(zd, zd, imm8, shift); - return true; - } - } - return false; -} - -void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn, - SVEArithPredicatedFn reg_macro, - const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm, - bool is_signed) { - if (is_signed) { - // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi - if (imm.IsInt8()) { - MovprfxHelperScope guard(this, zd, zn); - (this->*imm_fn)(zd, zd, imm.AsInt8()); - return; - } - } else { - // E.g. UMIN_z_zi, UMAX_z_zi - if (imm.IsUint8()) { - MovprfxHelperScope guard(this, zd, zn); - (this->*imm_fn)(zd, zd, imm.AsUint8()); - return; - } - } - - UseScratchRegisterScope temps(this); - PRegister pg = temps.AcquireGoverningP(); - Ptrue(pg.WithSameLaneSizeAs(zd)); - - // Try to re-use zd if we can, so we can avoid a movprfx. - ZRegister scratch = - zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()) - : zd; - Dup(scratch, imm); - - // The vector-form macro for commutative operations will swap the arguments to - // avoid movprfx, if necessary. - (this->*reg_macro)(zd, pg.Merging(), zn, scratch); -} - -void MacroAssembler::Mul(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - IntWideImmFn imm_fn = &Assembler::mul; - SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; - IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); -} - -void MacroAssembler::Smin(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInSignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::smin; - SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; - IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); -} - -void MacroAssembler::Smax(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInSignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::smax; - SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; - IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); -} - -void MacroAssembler::Umax(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::umax; - SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; - IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); -} - -void MacroAssembler::Umin(const ZRegister& zd, - const ZRegister& zn, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); - IntWideImmFn imm_fn = &Assembler::umin; - SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; - IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); -} - -void MacroAssembler::Addpl(const Register& xd, - const Register& xn, - int64_t multiplier) { - VIXL_ASSERT(allow_macro_instructions_); - - // This macro relies on `Rdvl` to handle some out-of-range cases. Check that - // `VL * multiplier` cannot overflow, for any possible value of VL. - VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); - VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); - - if (xd.IsZero()) return; - if (xn.IsZero() && xd.IsSP()) { - // TODO: This operation doesn't make much sense, but we could support it - // with a scratch register if necessary. - VIXL_UNIMPLEMENTED(); - } - - // Handling xzr requires an extra move, so defer it until later so we can try - // to use `rdvl` instead (via `Addvl`). - if (IsInt6(multiplier) && !xn.IsZero()) { - SingleEmissionCheckScope guard(this); - addpl(xd, xn, static_cast<int>(multiplier)); - return; - } - - // If `multiplier` is a multiple of 8, we can use `Addvl` instead. - if ((multiplier % kZRegBitsPerPRegBit) == 0) { - Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit); - return; - } - - if (IsInt6(multiplier)) { - VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`. - // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so - // materialise a zero. - MacroEmissionCheckScope guard(this); - movz(xd, 0); - addpl(xd, xd, static_cast<int>(multiplier)); - return; - } - - // TODO: Some probable cases result in rather long sequences. For example, - // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just - // outside the encodable range. We should look for ways to cover such cases - // without drastically increasing the complexity of this logic. - - // For other cases, calculate xn + (PL * multiplier) using discrete - // instructions. This requires two scratch registers in the general case, so - // try to re-use the destination as a scratch register. - UseScratchRegisterScope temps(this); - temps.Include(xd); - temps.Exclude(xn); - - Register scratch = temps.AcquireX(); - // Because there is no `rdpl`, so we have to calculate PL from VL. We can't - // scale the multiplier because (we already know) it isn't a multiple of 8. - Rdvl(scratch, multiplier); - - MacroEmissionCheckScope guard(this); - if (xn.IsZero()) { - asr(xd, scratch, kZRegBitsPerPRegBitLog2); - } else if (xd.IsSP() || xn.IsSP()) { - // TODO: MacroAssembler::Add should be able to handle this. - asr(scratch, scratch, kZRegBitsPerPRegBitLog2); - add(xd, xn, scratch); - } else { - add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2)); - } -} - -void MacroAssembler::Addvl(const Register& xd, - const Register& xn, - int64_t multiplier) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(xd.IsX()); - VIXL_ASSERT(xn.IsX()); - - // Check that `VL * multiplier` cannot overflow, for any possible value of VL. - VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); - VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); - - if (xd.IsZero()) return; - if (xn.IsZero() && xd.IsSP()) { - // TODO: This operation doesn't make much sense, but we could support it - // with a scratch register if necessary. `rdvl` cannot write into `sp`. - VIXL_UNIMPLEMENTED(); - } - - if (IsInt6(multiplier)) { - SingleEmissionCheckScope guard(this); - if (xn.IsZero()) { - rdvl(xd, static_cast<int>(multiplier)); - } else { - addvl(xd, xn, static_cast<int>(multiplier)); - } - return; - } - - // TODO: Some probable cases result in rather long sequences. For example, - // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just - // outside the encodable range. We should look for ways to cover such cases - // without drastically increasing the complexity of this logic. - - // For other cases, calculate xn + (VL * multiplier) using discrete - // instructions. This requires two scratch registers in the general case, so - // we try to re-use the destination as a scratch register. - UseScratchRegisterScope temps(this); - temps.Include(xd); - temps.Exclude(xn); - - Register a = temps.AcquireX(); - Mov(a, multiplier); - - MacroEmissionCheckScope guard(this); - Register b = temps.AcquireX(); - rdvl(b, 1); - if (xn.IsZero()) { - mul(xd, a, b); - } else if (xd.IsSP() || xn.IsSP()) { - mul(a, a, b); - add(xd, xn, a); - } else { - madd(xd, a, b, xn); - } -} - -void MacroAssembler::CalculateSVEAddress(const Register& xd, - const SVEMemOperand& addr, - int vl_divisor_log2) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(!addr.IsScatterGather()); - VIXL_ASSERT(xd.IsX()); - - // The lower bound is where a whole Z register is accessed. - VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0)); - // The upper bound is for P register accesses, and for instructions like - // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane. - VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2)); - - SVEOffsetModifier mod = addr.GetOffsetModifier(); - Register base = addr.GetScalarBase(); - - if (addr.IsEquivalentToScalar()) { - // For example: - // [x0] - // [x0, #0] - // [x0, xzr, LSL 2] - Mov(xd, base); - } else if (addr.IsScalarPlusImmediate()) { - // For example: - // [x0, #42] - // [x0, #42, MUL VL] - int64_t offset = addr.GetImmediateOffset(); - VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar. - if (addr.IsMulVl()) { - int vl_divisor = 1 << vl_divisor_log2; - // For all possible values of vl_divisor, we can simply use `Addpl`. This - // will select `addvl` if necessary. - VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0); - Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor)); - } else { - // IsScalarPlusImmediate() ensures that no other modifiers can occur. - VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); - Add(xd, base, offset); - } - } else if (addr.IsScalarPlusScalar()) { - // For example: - // [x0, x1] - // [x0, x1, LSL #4] - Register offset = addr.GetScalarOffset(); - VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar. - if (mod == SVE_LSL) { - Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount())); - } else { - // IsScalarPlusScalar() ensures that no other modifiers can occur. - VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); - Add(xd, base, offset); - } - } else { - // All other forms are scatter-gather addresses, which cannot be evaluated - // into an X register. - VIXL_UNREACHABLE(); - } -} - -void MacroAssembler::Cpy(const ZRegister& zd, - const PRegister& pg, - IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInLane(zd)); - int imm8; - int shift; - if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || - imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { - SingleEmissionCheckScope guard(this); - cpy(zd, pg, imm8, shift); - return; - } - - // The fallbacks rely on `cpy` variants that only support merging predication. - // If zeroing predication was requested, zero the destination first. - if (pg.IsZeroing()) { - SingleEmissionCheckScope guard(this); - dup(zd, 0); - } - PRegisterM pg_m = pg.Merging(); - - // Try to encode the immediate using fcpy. - VIXL_ASSERT(imm.FitsInLane(zd)); - if (zd.GetLaneSizeInBits() >= kHRegSize) { - double fp_imm = 0.0; - switch (zd.GetLaneSizeInBits()) { - case kHRegSize: - fp_imm = - FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN); - break; - case kSRegSize: - fp_imm = RawbitsToFloat(imm.AsUint32()); - break; - case kDRegSize: - fp_imm = RawbitsToDouble(imm.AsUint64()); - break; - default: - VIXL_UNREACHABLE(); - break; - } - // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so - // we can use IsImmFP64 for all lane sizes. - if (IsImmFP64(fp_imm)) { - SingleEmissionCheckScope guard(this); - fcpy(zd, pg_m, fp_imm); - return; - } - } - - // Fall back to using a scratch register. - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireRegisterToHoldLane(zd); - Mov(scratch, imm); - - SingleEmissionCheckScope guard(this); - cpy(zd, pg_m, scratch); -} - -// TODO: We implement Fcpy (amongst other things) for all FP types because it -// allows us to preserve user-specified NaNs. We should come up with some -// FPImmediate type to abstract this, and avoid all the duplication below (and -// elsewhere). - -void MacroAssembler::Fcpy(const ZRegister& zd, - const PRegisterM& pg, - double imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(pg.IsMerging()); - - if (IsImmFP64(imm)) { - SingleEmissionCheckScope guard(this); - fcpy(zd, pg, imm); - return; - } - - // As a fall-back, cast the immediate to the required lane size, and try to - // encode the bit pattern using `Cpy`. - Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); -} - -void MacroAssembler::Fcpy(const ZRegister& zd, - const PRegisterM& pg, - float imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(pg.IsMerging()); - - if (IsImmFP32(imm)) { - SingleEmissionCheckScope guard(this); - fcpy(zd, pg, imm); - return; - } - - // As a fall-back, cast the immediate to the required lane size, and try to - // encode the bit pattern using `Cpy`. - Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); -} - -void MacroAssembler::Fcpy(const ZRegister& zd, - const PRegisterM& pg, - Float16 imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(pg.IsMerging()); - - if (IsImmFP16(imm)) { - SingleEmissionCheckScope guard(this); - fcpy(zd, pg, imm); - return; - } - - // As a fall-back, cast the immediate to the required lane size, and try to - // encode the bit pattern using `Cpy`. - Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); -} - -void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInLane(zd)); - unsigned lane_size = zd.GetLaneSizeInBits(); - int imm8; - int shift; - if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || - imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { - SingleEmissionCheckScope guard(this); - dup(zd, imm8, shift); - } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) { - SingleEmissionCheckScope guard(this); - dupm(zd, imm.AsUintN(lane_size)); - } else { - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireRegisterToHoldLane(zd); - Mov(scratch, imm); - - SingleEmissionCheckScope guard(this); - dup(zd, scratch); - } -} - -void MacroAssembler::NoncommutativeArithmeticHelper( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEArithPredicatedFn fn, - SVEArithPredicatedFn rev_fn) { - if (zd.Aliases(zn)) { - // E.g. zd = zd / zm - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, pg, zn, zm); - } else if (zd.Aliases(zm)) { - // E.g. zd = zn / zd - SingleEmissionCheckScope guard(this); - (this->*rev_fn)(zd, pg, zm, zn); - } else { - // E.g. zd = zn / zm - MovprfxHelperScope guard(this, zd, pg, zn); - (this->*fn)(zd, pg, zd, zm); - } -} - -void MacroAssembler::FPCommutativeArithmeticHelper( - const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - SVEArithPredicatedFn fn, - FPMacroNaNPropagationOption nan_option) { - ResolveFPNaNPropagationOption(&nan_option); - - if (zd.Aliases(zn)) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, pg, zd, zm); - } else if (zd.Aliases(zm)) { - switch (nan_option) { - case FastNaNPropagation: { - // Swap the arguments. - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, pg, zd, zn); - return; - } - case StrictNaNPropagation: { - UseScratchRegisterScope temps(this); - // Use a scratch register to keep the argument order exactly as - // specified. - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); - { - MovprfxHelperScope guard(this, scratch, pg, zn); - (this->*fn)(scratch, pg, scratch, zm); - } - Mov(zd, scratch); - return; - } - case NoFPMacroNaNPropagationSelected: - VIXL_UNREACHABLE(); - return; - } - } else { - MovprfxHelperScope guard(this, zd, pg, zn); - (this->*fn)(zd, pg, zd, zm); - } -} - -void MacroAssembler::Asr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::asr), - static_cast<SVEArithPredicatedFn>( - &Assembler::asrr)); -} - -void MacroAssembler::Lsl(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::lsl), - static_cast<SVEArithPredicatedFn>( - &Assembler::lslr)); -} - -void MacroAssembler::Lsr(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::lsr), - static_cast<SVEArithPredicatedFn>( - &Assembler::lsrr)); -} - -void MacroAssembler::Fdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fdiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::fdivr)); -} - -void MacroAssembler::Fsub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fsub), - static_cast<SVEArithPredicatedFn>( - &Assembler::fsubr)); -} - -void MacroAssembler::Fadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fadd), - nan_option); -} - -void MacroAssembler::Fabd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fabd), - nan_option); -} - -void MacroAssembler::Fmul(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fmul), - nan_option); -} - -void MacroAssembler::Fmulx(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fmulx), - nan_option); -} - -void MacroAssembler::Fmax(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fmax), - nan_option); -} - -void MacroAssembler::Fmin(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fmin), - nan_option); -} - -void MacroAssembler::Fmaxnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fmaxnm), - nan_option); -} - -void MacroAssembler::Fminnm(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPCommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::fminnm), - nan_option); -} - -void MacroAssembler::Fdup(const ZRegister& zd, double imm) { - VIXL_ASSERT(allow_macro_instructions_); - - switch (zd.GetLaneSizeInBits()) { - case kHRegSize: - Fdup(zd, Float16(imm)); - break; - case kSRegSize: - Fdup(zd, static_cast<float>(imm)); - break; - case kDRegSize: - if (IsImmFP64(imm)) { - SingleEmissionCheckScope guard(this); - fdup(zd, imm); - } else { - Dup(zd, DoubleToRawbits(imm)); - } - break; - } -} - -void MacroAssembler::Fdup(const ZRegister& zd, float imm) { - VIXL_ASSERT(allow_macro_instructions_); - - switch (zd.GetLaneSizeInBits()) { - case kHRegSize: - Fdup(zd, Float16(imm)); - break; - case kSRegSize: - if (IsImmFP32(imm)) { - SingleEmissionCheckScope guard(this); - fdup(zd, imm); - } else { - Dup(zd, FloatToRawbits(imm)); - } - break; - case kDRegSize: - Fdup(zd, static_cast<double>(imm)); - break; - } -} - -void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) { - VIXL_ASSERT(allow_macro_instructions_); - - switch (zd.GetLaneSizeInBits()) { - case kHRegSize: - if (IsImmFP16(imm)) { - SingleEmissionCheckScope guard(this); - fdup(zd, imm); - } else { - Dup(zd, Float16ToRawbits(imm)); - } - break; - case kSRegSize: - Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN)); - break; - case kDRegSize: - Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); - break; - } -} - -void MacroAssembler::Index(const ZRegister& zd, - const Operand& start, - const Operand& step) { - class IndexOperand : public Operand { - public: - static IndexOperand Prepare(MacroAssembler* masm, - UseScratchRegisterScope* temps, - const Operand& op, - const ZRegister& zd) { - // Look for encodable immediates. - int imm; - if (op.IsImmediate()) { - if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) { - return IndexOperand(imm); - } - Register scratch = temps->AcquireRegisterToHoldLane(zd); - masm->Mov(scratch, op); - return IndexOperand(scratch); - } else { - // Plain registers can be encoded directly. - VIXL_ASSERT(op.IsPlainRegister()); - return IndexOperand(op.GetRegister()); - } - } - - int GetImm5() const { - int64_t imm = GetImmediate(); - VIXL_ASSERT(IsInt5(imm)); - return static_cast<int>(imm); - } - - private: - explicit IndexOperand(const Register& reg) : Operand(reg) {} - explicit IndexOperand(int64_t imm) : Operand(imm) {} - }; - - UseScratchRegisterScope temps(this); - IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd); - IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd); - - SingleEmissionCheckScope guard(this); - if (start_enc.IsImmediate()) { - if (step_enc.IsImmediate()) { - index(zd, start_enc.GetImm5(), step_enc.GetImm5()); - } else { - index(zd, start_enc.GetImm5(), step_enc.GetRegister()); - } - } else { - if (step_enc.IsImmediate()) { - index(zd, start_enc.GetRegister(), step_enc.GetImm5()); - } else { - index(zd, start_enc.GetRegister(), step_enc.GetRegister()); - } - } -} - -void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(imm.FitsInLane(zdn)); - - if (imm.IsZero()) { - SingleEmissionCheckScope guard(this); - insr(zdn, xzr); - return; - } - - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireRegisterToHoldLane(zdn); - - // TODO: There are many cases where we could optimise immediates, such as by - // detecting repeating patterns or FP immediates. We should optimise and - // abstract this for use in other SVE mov-immediate-like macros. - Mov(scratch, imm); - - SingleEmissionCheckScope guard(this); - insr(zdn, scratch); -} - -void MacroAssembler::Mla(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(za)) { - // zda = zda + (zn * zm) - SingleEmissionCheckScope guard(this); - mla(zd, pg, zn, zm); - } else if (zd.Aliases(zn)) { - // zdn = za + (zdn * zm) - SingleEmissionCheckScope guard(this); - mad(zd, pg, zm, za); - } else if (zd.Aliases(zm)) { - // Multiplication is commutative, so we can swap zn and zm. - // zdm = za + (zdm * zn) - SingleEmissionCheckScope guard(this); - mad(zd, pg, zn, za); - } else { - // zd = za + (zn * zm) - ExactAssemblyScope guard(this, 2 * kInstructionSize); - movprfx(zd, pg, za); - mla(zd, pg, zn, zm); - } -} - -void MacroAssembler::Mls(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(za)) { - // zda = zda - (zn * zm) - SingleEmissionCheckScope guard(this); - mls(zd, pg, zn, zm); - } else if (zd.Aliases(zn)) { - // zdn = za - (zdn * zm) - SingleEmissionCheckScope guard(this); - msb(zd, pg, zm, za); - } else if (zd.Aliases(zm)) { - // Multiplication is commutative, so we can swap zn and zm. - // zdm = za - (zdm * zn) - SingleEmissionCheckScope guard(this); - msb(zd, pg, zn, za); - } else { - // zd = za - (zn * zm) - ExactAssemblyScope guard(this, 2 * kInstructionSize); - movprfx(zd, pg, za); - mls(zd, pg, zn, zm); - } -} - -void MacroAssembler::CompareHelper(Condition cond, - const PRegisterWithLaneSize& pd, - const PRegisterZ& pg, - const ZRegister& zn, - IntegerOperand imm) { - UseScratchRegisterScope temps(this); - ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); - Dup(zm, imm); - SingleEmissionCheckScope guard(this); - cmp(cond, pd, pg, zn, zm); -} - -void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(pd.IsLaneSizeB()); - VIXL_ASSERT(pn.IsLaneSizeB()); - if (pd.Is(pn)) { - SingleEmissionCheckScope guard(this); - pfirst(pd, pg, pn); - } else { - UseScratchRegisterScope temps(this); - PRegister temp_pg = pg; - if (pd.Aliases(pg)) { - temp_pg = temps.AcquireP(); - Mov(temp_pg.VnB(), pg.VnB()); - } - Mov(pd, pn); - SingleEmissionCheckScope guard(this); - pfirst(pd, temp_pg, pd); - } -} - -void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd, - const PRegister& pg, - const PRegisterWithLaneSize& pn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(AreSameFormat(pd, pn)); - if (pd.Is(pn)) { - SingleEmissionCheckScope guard(this); - pnext(pd, pg, pn); - } else { - UseScratchRegisterScope temps(this); - PRegister temp_pg = pg; - if (pd.Aliases(pg)) { - temp_pg = temps.AcquireP(); - Mov(temp_pg.VnB(), pg.VnB()); - } - Mov(pd.VnB(), pn.VnB()); - SingleEmissionCheckScope guard(this); - pnext(pd, temp_pg, pd); - } -} - -void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, - SVEPredicateConstraint pattern, - FlagsUpdate s) { - VIXL_ASSERT(allow_macro_instructions_); - switch (s) { - case LeaveFlags: - Ptrue(pd, pattern); - return; - case SetFlags: - Ptrues(pd, pattern); - return; - } - VIXL_UNREACHABLE(); -} - -void MacroAssembler::Sdiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::sdiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::sdivr)); -} - -void MacroAssembler::Sub(const ZRegister& zd, - IntegerOperand imm, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - - int imm8; - int shift = -1; - if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || - imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { - MovprfxHelperScope guard(this, zd, zm); - subr(zd, zd, imm8, shift); - } else { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits()); - Dup(scratch, imm); - - SingleEmissionCheckScope guard(this); - sub(zd, scratch, zm); - } -} - -void MacroAssembler::Sub(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::sub), - static_cast<SVEArithPredicatedFn>( - &Assembler::subr)); -} - -void MacroAssembler::Udiv(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - NoncommutativeArithmeticHelper(zd, - pg, - zn, - zm, - static_cast<SVEArithPredicatedFn>( - &Assembler::udiv), - static_cast<SVEArithPredicatedFn>( - &Assembler::udivr)); -} - -void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - SVELoadBroadcastFn fn, - int divisor) { - VIXL_ASSERT(addr.IsScalarPlusImmediate()); - int64_t imm = addr.GetImmediateOffset(); - if ((imm % divisor == 0) && IsUint6(imm / divisor)) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - } else { - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireX(); - CalculateSVEAddress(scratch, addr, zt); - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, SVEMemOperand(scratch)); - } -} - -void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, - const SVEMemOperand& addr, - SVELoadStoreFn fn) { - VIXL_ASSERT(allow_macro_instructions_); - VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister()); - - if (addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) && - addr.IsMulVl())) { - SingleEmissionCheckScope guard(this); - (this->*fn)(rt, addr); - return; - } - - if (addr.IsEquivalentToScalar()) { - SingleEmissionCheckScope guard(this); - (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase())); - return; - } - - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireX(); - CalculateSVEAddress(scratch, addr, rt); - SingleEmissionCheckScope guard(this); - (this->*fn)(rt, SVEMemOperand(scratch)); -} - -template <typename Tg, typename Tf> -void MacroAssembler::SVELoadStoreScalarImmHelper( - const ZRegister& zt, - const Tg& pg, - const SVEMemOperand& addr, - Tf fn, - int imm_bits, - int shift_amount, - SVEOffsetModifier supported_modifier, - int vl_divisor_log2) { - VIXL_ASSERT(allow_macro_instructions_); - int imm_divisor = 1 << shift_amount; - - if (addr.IsPlainScalar() || - (addr.IsScalarPlusImmediate() && - IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) && - ((addr.GetImmediateOffset() % imm_divisor) == 0) && - (addr.GetOffsetModifier() == supported_modifier))) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - return; - } - - if (addr.IsEquivalentToScalar()) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); - return; - } - - if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) && - (vl_divisor_log2 == -1)) { - // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL - // dependent. - VIXL_UNIMPLEMENTED(); - } - - UseScratchRegisterScope temps(this); - Register scratch = temps.AcquireX(); - CalculateSVEAddress(scratch, addr, vl_divisor_log2); - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, SVEMemOperand(scratch)); -} - -template <typename Tg, typename Tf> -void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, - const ZRegister& zt, - const Tg& pg, - const SVEMemOperand& addr, - Tf fn) { - if (addr.IsPlainScalar() || - (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && - addr.IsEquivalentToLSL(msize_in_bytes_log2)) || - (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) && - addr.IsMulVl())) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - return; - } - - if (addr.IsEquivalentToScalar()) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); - return; - } - - if (addr.IsVectorPlusImmediate()) { - uint64_t offset = addr.GetImmediateOffset(); - if (IsMultiple(offset, (1 << msize_in_bytes_log2)) && - IsUint5(offset >> msize_in_bytes_log2)) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - return; - } - } - - if (addr.IsScalarPlusVector()) { - VIXL_ASSERT(addr.IsScatterGather()); - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - return; - } - - UseScratchRegisterScope temps(this); - if (addr.IsScatterGather()) { - // In scatter-gather modes, zt and zn/zm have the same lane size. However, - // for 32-bit accesses, the result of each lane's address calculation still - // requires 64 bits; we can't naively use `Adr` for the address calculation - // because it would truncate each address to 32 bits. - - if (addr.IsVectorPlusImmediate()) { - // Synthesise the immediate in an X register, then use a - // scalar-plus-vector access with the original vector. - Register scratch = temps.AcquireX(); - Mov(scratch, addr.GetImmediateOffset()); - SingleEmissionCheckScope guard(this); - SVEOffsetModifier om = - zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER; - (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om)); - return; - } - - VIXL_UNIMPLEMENTED(); - } else { - Register scratch = temps.AcquireX(); - // TODO: If we have an immediate offset that is a multiple of - // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to - // save an instruction. - int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2; - CalculateSVEAddress(scratch, addr, vl_divisor_log2); - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, SVEMemOperand(scratch)); - } -} - -template <typename Tf> -void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2, - const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr, - Tf fn) { - if (addr.IsScatterGather()) { - // Scatter-gather first-fault loads share encodings with normal loads. - SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn); - return; - } - - // Contiguous first-faulting loads have no scalar-plus-immediate form at all, - // so we don't do immediate synthesis. - - // We cannot currently distinguish "[x0]" from "[x0, #0]", and this - // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here. - if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && - addr.IsEquivalentToLSL(msize_in_bytes_log2))) { - SingleEmissionCheckScope guard(this); - (this->*fn)(zt, pg, addr); - return; - } - - VIXL_UNIMPLEMENTED(); -} - -void MacroAssembler::Ld1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kBRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1b)); -} - -void MacroAssembler::Ld1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kHRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1h)); -} - -void MacroAssembler::Ld1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kWRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1w)); -} - -void MacroAssembler::Ld1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kDRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1d)); -} - -void MacroAssembler::Ld1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kBRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1sb)); -} - -void MacroAssembler::Ld1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kHRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1sh)); -} - -void MacroAssembler::Ld1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kSRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ld1sw)); -} - -void MacroAssembler::St1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kBRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVEStore1Fn>(&Assembler::st1b)); -} - -void MacroAssembler::St1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kHRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVEStore1Fn>(&Assembler::st1h)); -} - -void MacroAssembler::St1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kSRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVEStore1Fn>(&Assembler::st1w)); -} - -void MacroAssembler::St1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStore1Helper(kDRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVEStore1Fn>(&Assembler::st1d)); -} - -void MacroAssembler::Ldff1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kBRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1b)); -} - -void MacroAssembler::Ldff1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kHRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1h)); -} - -void MacroAssembler::Ldff1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kSRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1w)); -} - -void MacroAssembler::Ldff1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kDRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1d)); -} - -void MacroAssembler::Ldff1sb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kBRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1sb)); -} - -void MacroAssembler::Ldff1sh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kHRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1sh)); -} - -void MacroAssembler::Ldff1sw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadFFHelper(kSRegSizeInBytesLog2, - zt, - pg, - addr, - static_cast<SVELoad1Fn>(&Assembler::ldff1sw)); -} - -void MacroAssembler::Ld1rqb(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqb, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} - -void MacroAssembler::Ld1rqd(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqd, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} - -void MacroAssembler::Ld1rqh(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqh, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} - -void MacroAssembler::Ld1rqw(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ld1rqw, - 4, - 4, - NO_SVE_OFFSET_MODIFIER, - -1); -} - -void MacroAssembler::Ldnt1b(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1b, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::Ldnt1d(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1d, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::Ldnt1h(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1h, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::Ldnt1w(const ZRegister& zt, - const PRegisterZ& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::ldnt1w, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::Stnt1b(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1b, - 4, - 0, - SVE_MUL_VL); -} -void MacroAssembler::Stnt1d(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1d, - 4, - 0, - SVE_MUL_VL); -} -void MacroAssembler::Stnt1h(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1h, - 4, - 0, - SVE_MUL_VL); -} -void MacroAssembler::Stnt1w(const ZRegister& zt, - const PRegister& pg, - const SVEMemOperand& addr) { - VIXL_ASSERT(allow_macro_instructions_); - SVELoadStoreScalarImmHelper(zt, - pg, - addr, - &MacroAssembler::stnt1w, - 4, - 0, - SVE_MUL_VL); -} - -void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - if (zd.Aliases(za)) { - // zda = zda + (zn . zm) - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, zn, zm, index); - - } else if (zd.Aliases(zn) || zd.Aliases(zm)) { - // zdn = za + (zdn . zm[index]) - // zdm = za + (zn . zdm[index]) - // zdnm = za + (zdnm . zdnm[index]) - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, za); - (this->*fn)(scratch, zn, zm, index); - } - - Mov(zd, scratch); - } else { - // zd = za + (zn . zm) - MovprfxHelperScope guard(this, zd, za); - (this->*fn)(zd, zn, zm, index); - } -} - -void MacroAssembler::SVESdotUdotHelper(IntArithFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - if (zd.Aliases(za)) { - // zda = zda + (zn . zm) - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, zn, zm); - - } else if (zd.Aliases(zn) || zd.Aliases(zm)) { - // zdn = za + (zdn . zm) - // zdm = za + (zn . zdm) - // zdnm = za + (zdnm . zdnm) - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, za); - (this->*fn)(scratch, zn, zm); - } - - Mov(zd, scratch); - } else { - // zd = za + (zn . zm) - MovprfxHelperScope guard(this, zd, za); - (this->*fn)(zd, zn, zm); - } -} - -void MacroAssembler::Fscale(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); - Mov(scratch, zm); - MovprfxHelperScope guard(this, zd, pg, zn); - fscale(zd, pg, zd, scratch); - } else { - MovprfxHelperScope guard(this, zd, pg, zn); - fscale(zd, pg, zd, zm); - } -} - -void MacroAssembler::Sdot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm); -} - -void MacroAssembler::Sdot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); -} - -void MacroAssembler::Udot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm); -} - -void MacroAssembler::Udot(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); -} - -void MacroAssembler::FPMulAddHelper(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - SVEMulAddPredicatedZdaFn fn_zda, - SVEMulAddPredicatedZdnFn fn_zdn, - FPMacroNaNPropagationOption nan_option) { - ResolveFPNaNPropagationOption(&nan_option); - - if (zd.Aliases(za)) { - // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. - SingleEmissionCheckScope guard(this); - (this->*fn_zda)(zd, pg, zn, zm); - } else if (zd.Aliases(zn)) { - // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb. - SingleEmissionCheckScope guard(this); - (this->*fn_zdn)(zd, pg, zm, za); - } else if (zd.Aliases(zm)) { - switch (nan_option) { - case FastNaNPropagation: { - // We treat multiplication as commutative in the fast mode, so we can - // swap zn and zm. - // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb. - SingleEmissionCheckScope guard(this); - (this->*fn_zdn)(zd, pg, zn, za); - return; - } - case StrictNaNPropagation: { - UseScratchRegisterScope temps(this); - // Use a scratch register to keep the argument order exactly as - // specified. - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); - { - MovprfxHelperScope guard(this, scratch, pg, za); - // scratch = (-)za + ((-)zn * zm) - (this->*fn_zda)(scratch, pg, zn, zm); - } - Mov(zd, scratch); - return; - } - case NoFPMacroNaNPropagationSelected: - VIXL_UNREACHABLE(); - return; - } - } else { - // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. - MovprfxHelperScope guard(this, zd, pg, za); - (this->*fn_zda)(zd, pg, zn, zm); - } -} - -void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn, - const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - if (zd.Aliases(za)) { - // zda = zda + (zn * zm[i]) - SingleEmissionCheckScope guard(this); - (this->*fn)(zd, zn, zm, index); - - } else if (zd.Aliases(zn) || zd.Aliases(zm)) { - // zdn = za + (zdn * zm[i]) - // zdm = za + (zn * zdm[i]) - // zdnm = za + (zdnm * zdnm[i]) - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, za); - (this->*fn)(scratch, zn, zm, index); - } - Mov(zd, scratch); - } else { - // zd = za + (zn * zm[i]) - MovprfxHelperScope guard(this, zd, za); - (this->*fn)(zd, zn, zm, index); - } -} - -void MacroAssembler::Fmla(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddHelper(zd, - pg, - za, - zn, - zm, - &Assembler::fmla, - &Assembler::fmad, - nan_option); -} - -void MacroAssembler::Fmla(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index); -} - -void MacroAssembler::Fmls(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddHelper(zd, - pg, - za, - zn, - zm, - &Assembler::fmls, - &Assembler::fmsb, - nan_option); -} - -void MacroAssembler::Fmls(const ZRegister& zd, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - int index) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index); -} - -void MacroAssembler::Fnmla(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddHelper(zd, - pg, - za, - zn, - zm, - &Assembler::fnmla, - &Assembler::fnmad, - nan_option); -} - -void MacroAssembler::Fnmls(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& za, - const ZRegister& zn, - const ZRegister& zm, - FPMacroNaNPropagationOption nan_option) { - VIXL_ASSERT(allow_macro_instructions_); - FPMulAddHelper(zd, - pg, - za, - zn, - zm, - &Assembler::fnmls, - &Assembler::fnmsb, - nan_option); -} - -void MacroAssembler::Ftmad(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - int imm3) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); - Mov(scratch, zm); - MovprfxHelperScope guard(this, zd, zn); - ftmad(zd, zd, scratch, imm3); - } else { - MovprfxHelperScope guard(this, zd, zn); - ftmad(zd, zd, zm, imm3); - } -} - -void MacroAssembler::Fcadd(const ZRegister& zd, - const PRegisterM& pg, - const ZRegister& zn, - const ZRegister& zm, - int rot) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, pg, zn); - fcadd(scratch, pg, scratch, zm, rot); - } - Mov(zd, scratch); - } else { - MovprfxHelperScope guard(this, zd, pg, zn); - fcadd(zd, pg, zd, zm, rot); - } -} - -void MacroAssembler::Ext(const ZRegister& zd, - const ZRegister& zn, - const ZRegister& zm, - unsigned offset) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - // zd = ext(zn, zd, offset) - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, zn); - ext(scratch, scratch, zm, offset); - } - Mov(zd, scratch); - } else { - // zd = ext(zn, zm, offset) - // zd = ext(zd, zd, offset) - MovprfxHelperScope guard(this, zd, zn); - ext(zd, zd, zm, offset); - } -} - -void MacroAssembler::Splice(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, zn); - splice(scratch, pg, scratch, zm); - } - Mov(zd, scratch); - } else { - MovprfxHelperScope guard(this, zd, zn); - splice(zd, pg, zd, zm); - } -} - -void MacroAssembler::Clasta(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, zn); - clasta(scratch, pg, scratch, zm); - } - Mov(zd, scratch); - } else { - MovprfxHelperScope guard(this, zd, zn); - clasta(zd, pg, zd, zm); - } -} - -void MacroAssembler::Clastb(const ZRegister& zd, - const PRegister& pg, - const ZRegister& zn, - const ZRegister& zm) { - VIXL_ASSERT(allow_macro_instructions_); - if (zd.Aliases(zm) && !zd.Aliases(zn)) { - UseScratchRegisterScope temps(this); - ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); - { - MovprfxHelperScope guard(this, scratch, zn); - clastb(scratch, pg, scratch, zm); - } - Mov(zd, scratch); - } else { - MovprfxHelperScope guard(this, zd, zn); - clastb(zd, pg, zd, zm); - } -} - -} // namespace aarch64 -} // namespace vixl diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc index 008179e4..20364616 100644 --- a/src/aarch64/operands-aarch64.cc +++ b/src/aarch64/operands-aarch64.cc @@ -30,32 +30,32 @@ namespace vixl { namespace aarch64 { // CPURegList utilities. -CPURegister CPURegList::PopLowestIndex(RegList mask) { - RegList list = list_ & mask; - if (list == 0) return NoCPUReg; - int index = CountTrailingZeros(list); - VIXL_ASSERT(((1 << index) & list) != 0); +CPURegister CPURegList::PopLowestIndex() { + if (IsEmpty()) { + return NoCPUReg; + } + int index = CountTrailingZeros(list_); + VIXL_ASSERT((1 << index) & list_); Remove(index); return CPURegister(index, size_, type_); } -CPURegister CPURegList::PopHighestIndex(RegList mask) { - RegList list = list_ & mask; - if (list == 0) return NoCPUReg; - int index = CountLeadingZeros(list); +CPURegister CPURegList::PopHighestIndex() { + VIXL_ASSERT(IsValid()); + if (IsEmpty()) { + return NoCPUReg; + } + int index = CountLeadingZeros(list_); index = kRegListSizeInBits - 1 - index; - VIXL_ASSERT(((1 << index) & list) != 0); + VIXL_ASSERT((1 << index) & list_); Remove(index); return CPURegister(index, size_, type_); } bool CPURegList::IsValid() const { - if (type_ == CPURegister::kNoRegister) { - // We can't use IsEmpty here because that asserts IsValid(). - return list_ == 0; - } else { + if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) { bool is_valid = true; // Try to create a CPURegister for each element in the list. for (int i = 0; i < kRegListSizeInBits; i++) { @@ -64,6 +64,11 @@ bool CPURegList::IsValid() const { } } return is_valid; + } else if (type_ == CPURegister::kNoRegister) { + // We can't use IsEmpty here because that asserts IsValid(). + return list_ == 0; + } else { + return false; } } @@ -144,6 +149,145 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV(); const CPURegList kCallerSaved = CPURegList::GetCallerSaved(); const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV(); + +// Registers. +#define WREG(n) w##n, +const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)}; +#undef WREG + +#define XREG(n) x##n, +const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)}; +#undef XREG + +#define BREG(n) b##n, +const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)}; +#undef BREG + +#define HREG(n) h##n, +const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)}; +#undef HREG + +#define SREG(n) s##n, +const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)}; +#undef SREG + +#define DREG(n) d##n, +const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)}; +#undef DREG + +#define QREG(n) q##n, +const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)}; +#undef QREG + +#define VREG(n) v##n, +const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)}; +#undef VREG + + +const Register& Register::GetWRegFromCode(unsigned code) { + if (code == kSPRegInternalCode) { + return wsp; + } else { + VIXL_ASSERT(code < kNumberOfRegisters); + return wregisters[code]; + } +} + + +const Register& Register::GetXRegFromCode(unsigned code) { + if (code == kSPRegInternalCode) { + return sp; + } else { + VIXL_ASSERT(code < kNumberOfRegisters); + return xregisters[code]; + } +} + + +const VRegister& VRegister::GetBRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return bregisters[code]; +} + + +const VRegister& VRegister::GetHRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return hregisters[code]; +} + + +const VRegister& VRegister::GetSRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return sregisters[code]; +} + + +const VRegister& VRegister::GetDRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return dregisters[code]; +} + + +const VRegister& VRegister::GetQRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return qregisters[code]; +} + + +const VRegister& VRegister::GetVRegFromCode(unsigned code) { + VIXL_ASSERT(code < kNumberOfVRegisters); + return vregisters[code]; +} + + +const Register& CPURegister::W() const { + VIXL_ASSERT(IsValidRegister()); + return Register::GetWRegFromCode(code_); +} + + +const Register& CPURegister::X() const { + VIXL_ASSERT(IsValidRegister()); + return Register::GetXRegFromCode(code_); +} + + +const VRegister& CPURegister::B() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetBRegFromCode(code_); +} + + +const VRegister& CPURegister::H() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetHRegFromCode(code_); +} + + +const VRegister& CPURegister::S() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetSRegFromCode(code_); +} + + +const VRegister& CPURegister::D() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetDRegFromCode(code_); +} + + +const VRegister& CPURegister::Q() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetQRegFromCode(code_); +} + + +const VRegister& CPURegister::V() const { + VIXL_ASSERT(IsValidVRegister()); + return VRegister::GetVRegFromCode(code_); +} + + // Operand. Operand::Operand(int64_t immediate) : immediate_(immediate), @@ -152,12 +296,6 @@ Operand::Operand(int64_t immediate) extend_(NO_EXTEND), shift_amount_(0) {} -Operand::Operand(IntegerOperand immediate) - : immediate_(immediate.AsIntN(64)), - reg_(NoReg), - shift_(NO_SHIFT), - extend_(NO_EXTEND), - shift_amount_(0) {} Operand::Operand(Register reg, Shift shift, unsigned shift_amount) : reg_(reg), @@ -333,24 +471,6 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) } -bool MemOperand::IsPlainRegister() const { - return IsImmediateOffset() && (GetOffset() == 0); -} - - -bool MemOperand::IsEquivalentToPlainRegister() const { - if (regoffset_.Is(NoReg)) { - // Immediate offset, pre-index or post-index. - return GetOffset() == 0; - } else if (GetRegisterOffset().IsZero()) { - // Zero register offset, pre-index or post-index. - // We can ignore shift and extend options because they all result in zero. - return true; - } - return false; -} - - bool MemOperand::IsImmediateOffset() const { return (addrmode_ == Offset) && regoffset_.Is(NoReg); } @@ -373,62 +493,6 @@ void MemOperand::AddOffset(int64_t offset) { } -bool SVEMemOperand::IsValid() const { -#ifdef VIXL_DEBUG - { - // It should not be possible for an SVEMemOperand to match multiple types. - int count = 0; - if (IsScalarPlusImmediate()) count++; - if (IsScalarPlusScalar()) count++; - if (IsScalarPlusVector()) count++; - if (IsVectorPlusImmediate()) count++; - if (IsVectorPlusVector()) count++; - VIXL_ASSERT(count <= 1); - } -#endif - - // We can't have a register _and_ an immediate offset. - if ((offset_ != 0) && (!regoffset_.IsNone())) return false; - - if (shift_amount_ != 0) { - // Only shift and extend modifiers can take a shift amount. - switch (mod_) { - case NO_SVE_OFFSET_MODIFIER: - case SVE_MUL_VL: - return false; - case SVE_LSL: - case SVE_UXTW: - case SVE_SXTW: - // Fall through. - break; - } - } - - return IsScalarPlusImmediate() || IsScalarPlusScalar() || - IsScalarPlusVector() || IsVectorPlusImmediate() || - IsVectorPlusVector(); -} - - -bool SVEMemOperand::IsEquivalentToScalar() const { - if (IsScalarPlusImmediate()) { - return GetImmediateOffset() == 0; - } - if (IsScalarPlusScalar()) { - // We can ignore the shift because it will still result in zero. - return GetScalarOffset().IsZero(); - } - // Forms involving vectors are never equivalent to a single scalar. - return false; -} - -bool SVEMemOperand::IsPlainRegister() const { - if (IsScalarPlusImmediate()) { - return GetImmediateOffset() == 0; - } - return false; -} - GenericOperand::GenericOperand(const CPURegister& reg) : cpu_register_(reg), mem_op_size_(0) { if (reg.IsQ()) { diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h index ad03a9ee..bfc6b702 100644 --- a/src/aarch64/operands-aarch64.h +++ b/src/aarch64/operands-aarch64.h @@ -27,15 +27,525 @@ #ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_ #define VIXL_AARCH64_OPERANDS_AARCH64_H_ -#include <sstream> -#include <string> - #include "instructions-aarch64.h" -#include "registers-aarch64.h" namespace vixl { namespace aarch64 { +typedef uint64_t RegList; +static const int kRegListSizeInBits = sizeof(RegList) * 8; + + +// Registers. + +// Some CPURegister methods can return Register or VRegister types, so we need +// to declare them in advance. +class Register; +class VRegister; + +class CPURegister { + public: + enum RegisterType { + // The kInvalid value is used to detect uninitialized static instances, + // which are always zero-initialized before any constructors are called. + kInvalid = 0, + kRegister, + kVRegister, + kNoRegister + }; + + CPURegister() : code_(0), size_(0), type_(kNoRegister) { + VIXL_ASSERT(!IsValid()); + VIXL_ASSERT(IsNone()); + } + + CPURegister(unsigned code, unsigned size, RegisterType type) + : code_(code), size_(size), type_(type) { + VIXL_ASSERT(IsValidOrNone()); + } + + unsigned GetCode() const { + VIXL_ASSERT(IsValid()); + return code_; + } + VIXL_DEPRECATED("GetCode", unsigned code() const) { return GetCode(); } + + RegisterType GetType() const { + VIXL_ASSERT(IsValidOrNone()); + return type_; + } + VIXL_DEPRECATED("GetType", RegisterType type() const) { return GetType(); } + + RegList GetBit() const { + VIXL_ASSERT(code_ < (sizeof(RegList) * 8)); + return IsValid() ? (static_cast<RegList>(1) << code_) : 0; + } + VIXL_DEPRECATED("GetBit", RegList Bit() const) { return GetBit(); } + + int GetSizeInBytes() const { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(size_ % 8 == 0); + return size_ / 8; + } + VIXL_DEPRECATED("GetSizeInBytes", int SizeInBytes() const) { + return GetSizeInBytes(); + } + + int GetSizeInBits() const { + VIXL_ASSERT(IsValid()); + return size_; + } + VIXL_DEPRECATED("GetSizeInBits", unsigned size() const) { + return GetSizeInBits(); + } + VIXL_DEPRECATED("GetSizeInBits", int SizeInBits() const) { + return GetSizeInBits(); + } + + bool Is8Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 8; + } + + bool Is16Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 16; + } + + bool Is32Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 32; + } + + bool Is64Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 64; + } + + bool Is128Bits() const { + VIXL_ASSERT(IsValid()); + return size_ == 128; + } + + bool IsValid() const { + if (IsValidRegister() || IsValidVRegister()) { + VIXL_ASSERT(!IsNone()); + return true; + } else { + // This assert is hit when the register has not been properly initialized. + // One cause for this can be an initialisation order fiasco. See + // https://isocpp.org/wiki/faq/ctors#static-init-order for some details. + VIXL_ASSERT(IsNone()); + return false; + } + } + + bool IsValidRegister() const { + return IsRegister() && ((size_ == kWRegSize) || (size_ == kXRegSize)) && + ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)); + } + + bool IsValidVRegister() const { + return IsVRegister() && ((size_ == kBRegSize) || (size_ == kHRegSize) || + (size_ == kSRegSize) || (size_ == kDRegSize) || + (size_ == kQRegSize)) && + (code_ < kNumberOfVRegisters); + } + + bool IsValidFPRegister() const { + return IsValidVRegister() && IsFPRegister(); + } + + bool IsNone() const { + // kNoRegister types should always have size 0 and code 0. + VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0)); + VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0)); + + return type_ == kNoRegister; + } + + bool Aliases(const CPURegister& other) const { + VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); + return (code_ == other.code_) && (type_ == other.type_); + } + + bool Is(const CPURegister& other) const { + VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone()); + return Aliases(other) && (size_ == other.size_); + } + + bool IsZero() const { + VIXL_ASSERT(IsValid()); + return IsRegister() && (code_ == kZeroRegCode); + } + + bool IsSP() const { + VIXL_ASSERT(IsValid()); + return IsRegister() && (code_ == kSPRegInternalCode); + } + + bool IsRegister() const { return type_ == kRegister; } + + bool IsVRegister() const { return type_ == kVRegister; } + + // CPURegister does not track lanes like VRegister does, so we have to assume + // that we have scalar types here. + // TODO: Encode lane information in CPURegister so that we can be consistent. + bool IsFPRegister() const { return IsH() || IsS() || IsD(); } + + bool IsW() const { return IsValidRegister() && Is32Bits(); } + bool IsX() const { return IsValidRegister() && Is64Bits(); } + + // These assertions ensure that the size and type of the register are as + // described. They do not consider the number of lanes that make up a vector. + // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() + // does not imply Is1D() or Is8B(). + // Check the number of lanes, ie. the format of the vector, using methods such + // as Is8B(), Is1D(), etc. in the VRegister class. + bool IsV() const { return IsVRegister(); } + bool IsB() const { return IsV() && Is8Bits(); } + bool IsH() const { return IsV() && Is16Bits(); } + bool IsS() const { return IsV() && Is32Bits(); } + bool IsD() const { return IsV() && Is64Bits(); } + bool IsQ() const { return IsV() && Is128Bits(); } + + // Semantic type for sdot and udot instructions. + bool IsS4B() const { return IsS(); } + const VRegister& S4B() const { return S(); } + + const Register& W() const; + const Register& X() const; + const VRegister& V() const; + const VRegister& B() const; + const VRegister& H() const; + const VRegister& S() const; + const VRegister& D() const; + const VRegister& Q() const; + + bool IsSameType(const CPURegister& other) const { + return type_ == other.type_; + } + + bool IsSameSizeAndType(const CPURegister& other) const { + return (size_ == other.size_) && IsSameType(other); + } + + protected: + unsigned code_; + int size_; + RegisterType type_; + + private: + bool IsValidOrNone() const { return IsValid() || IsNone(); } +}; + + +class Register : public CPURegister { + public: + Register() : CPURegister() {} + explicit Register(const CPURegister& other) + : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()) { + VIXL_ASSERT(IsValidRegister()); + } + Register(unsigned code, unsigned size) : CPURegister(code, size, kRegister) {} + + bool IsValid() const { + VIXL_ASSERT(IsRegister() || IsNone()); + return IsValidRegister(); + } + + static const Register& GetWRegFromCode(unsigned code); + VIXL_DEPRECATED("GetWRegFromCode", + static const Register& WRegFromCode(unsigned code)) { + return GetWRegFromCode(code); + } + + static const Register& GetXRegFromCode(unsigned code); + VIXL_DEPRECATED("GetXRegFromCode", + static const Register& XRegFromCode(unsigned code)) { + return GetXRegFromCode(code); + } + + private: + static const Register wregisters[]; + static const Register xregisters[]; +}; + + +namespace internal { + +template <int size_in_bits> +class FixedSizeRegister : public Register { + public: + FixedSizeRegister() : Register() {} + explicit FixedSizeRegister(unsigned code) : Register(code, size_in_bits) { + VIXL_ASSERT(IsValidRegister()); + } + explicit FixedSizeRegister(const Register& other) + : Register(other.GetCode(), size_in_bits) { + VIXL_ASSERT(other.GetSizeInBits() == size_in_bits); + VIXL_ASSERT(IsValidRegister()); + } + explicit FixedSizeRegister(const CPURegister& other) + : Register(other.GetCode(), other.GetSizeInBits()) { + VIXL_ASSERT(other.GetType() == kRegister); + VIXL_ASSERT(other.GetSizeInBits() == size_in_bits); + VIXL_ASSERT(IsValidRegister()); + } + + bool IsValid() const { + return Register::IsValid() && (GetSizeInBits() == size_in_bits); + } +}; + +} // namespace internal + +typedef internal::FixedSizeRegister<kXRegSize> XRegister; +typedef internal::FixedSizeRegister<kWRegSize> WRegister; + + +class VRegister : public CPURegister { + public: + VRegister() : CPURegister(), lanes_(1) {} + explicit VRegister(const CPURegister& other) + : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()), + lanes_(1) { + VIXL_ASSERT(IsValidVRegister()); + VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + VRegister(unsigned code, unsigned size, unsigned lanes = 1) + : CPURegister(code, size, kVRegister), lanes_(lanes) { + VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + VRegister(unsigned code, VectorFormat format) + : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister), + lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) { + VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16)); + } + + bool IsValid() const { + VIXL_ASSERT(IsVRegister() || IsNone()); + return IsValidVRegister(); + } + + static const VRegister& GetBRegFromCode(unsigned code); + VIXL_DEPRECATED("GetBRegFromCode", + static const VRegister& BRegFromCode(unsigned code)) { + return GetBRegFromCode(code); + } + + static const VRegister& GetHRegFromCode(unsigned code); + VIXL_DEPRECATED("GetHRegFromCode", + static const VRegister& HRegFromCode(unsigned code)) { + return GetHRegFromCode(code); + } + + static const VRegister& GetSRegFromCode(unsigned code); + VIXL_DEPRECATED("GetSRegFromCode", + static const VRegister& SRegFromCode(unsigned code)) { + return GetSRegFromCode(code); + } + + static const VRegister& GetDRegFromCode(unsigned code); + VIXL_DEPRECATED("GetDRegFromCode", + static const VRegister& DRegFromCode(unsigned code)) { + return GetDRegFromCode(code); + } + + static const VRegister& GetQRegFromCode(unsigned code); + VIXL_DEPRECATED("GetQRegFromCode", + static const VRegister& QRegFromCode(unsigned code)) { + return GetQRegFromCode(code); + } + + static const VRegister& GetVRegFromCode(unsigned code); + VIXL_DEPRECATED("GetVRegFromCode", + static const VRegister& VRegFromCode(unsigned code)) { + return GetVRegFromCode(code); + } + + VRegister V8B() const { return VRegister(code_, kDRegSize, 8); } + VRegister V16B() const { return VRegister(code_, kQRegSize, 16); } + VRegister V2H() const { return VRegister(code_, kSRegSize, 2); } + VRegister V4H() const { return VRegister(code_, kDRegSize, 4); } + VRegister V8H() const { return VRegister(code_, kQRegSize, 8); } + VRegister V2S() const { return VRegister(code_, kDRegSize, 2); } + VRegister V4S() const { return VRegister(code_, kQRegSize, 4); } + VRegister V2D() const { return VRegister(code_, kQRegSize, 2); } + VRegister V1D() const { return VRegister(code_, kDRegSize, 1); } + + bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); } + bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); } + bool Is2H() const { return (Is32Bits() && (lanes_ == 2)); } + bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); } + bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); } + bool Is1S() const { return (Is32Bits() && (lanes_ == 1)); } + bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); } + bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); } + bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); } + bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); } + + // For consistency, we assert the number of lanes of these scalar registers, + // even though there are no vectors of equivalent total size with which they + // could alias. + bool Is1B() const { + VIXL_ASSERT(!(Is8Bits() && IsVector())); + return Is8Bits(); + } + bool Is1H() const { + VIXL_ASSERT(!(Is16Bits() && IsVector())); + return Is16Bits(); + } + + // Semantic type for sdot and udot instructions. + bool Is1S4B() const { return Is1S(); } + + + bool IsLaneSizeB() const { return GetLaneSizeInBits() == kBRegSize; } + bool IsLaneSizeH() const { return GetLaneSizeInBits() == kHRegSize; } + bool IsLaneSizeS() const { return GetLaneSizeInBits() == kSRegSize; } + bool IsLaneSizeD() const { return GetLaneSizeInBits() == kDRegSize; } + + int GetLanes() const { return lanes_; } + VIXL_DEPRECATED("GetLanes", int lanes() const) { return GetLanes(); } + + bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); } + bool IsValidFPRegister() const { + return IsValidVRegister() && IsFPRegister(); + } + + bool IsScalar() const { return lanes_ == 1; } + + bool IsVector() const { return lanes_ > 1; } + + bool IsSameFormat(const VRegister& other) const { + return (size_ == other.size_) && (lanes_ == other.lanes_); + } + + unsigned GetLaneSizeInBytes() const { return GetSizeInBytes() / lanes_; } + VIXL_DEPRECATED("GetLaneSizeInBytes", unsigned LaneSizeInBytes() const) { + return GetLaneSizeInBytes(); + } + + unsigned GetLaneSizeInBits() const { return GetLaneSizeInBytes() * 8; } + VIXL_DEPRECATED("GetLaneSizeInBits", unsigned LaneSizeInBits() const) { + return GetLaneSizeInBits(); + } + + private: + static const VRegister bregisters[]; + static const VRegister hregisters[]; + static const VRegister sregisters[]; + static const VRegister dregisters[]; + static const VRegister qregisters[]; + static const VRegister vregisters[]; + int lanes_; +}; + + +// No*Reg is used to indicate an unused argument, or an error case. Note that +// these all compare equal (using the Is() method). The Register and VRegister +// variants are provided for convenience. +const Register NoReg; +const VRegister NoVReg; +const CPURegister NoCPUReg; + + +#define DEFINE_REGISTERS(N) \ + const WRegister w##N(N); \ + const XRegister x##N(N); +AARCH64_REGISTER_CODE_LIST(DEFINE_REGISTERS) +#undef DEFINE_REGISTERS +const WRegister wsp(kSPRegInternalCode); +const XRegister sp(kSPRegInternalCode); + + +#define DEFINE_VREGISTERS(N) \ + const VRegister b##N(N, kBRegSize); \ + const VRegister h##N(N, kHRegSize); \ + const VRegister s##N(N, kSRegSize); \ + const VRegister d##N(N, kDRegSize); \ + const VRegister q##N(N, kQRegSize); \ + const VRegister v##N(N, kQRegSize); +AARCH64_REGISTER_CODE_LIST(DEFINE_VREGISTERS) +#undef DEFINE_VREGISTERS + + +// Register aliases. +const XRegister ip0 = x16; +const XRegister ip1 = x17; +const XRegister lr = x30; +const XRegister xzr = x31; +const WRegister wzr = w31; + + +// AreAliased returns true if any of the named registers overlap. Arguments +// set to NoReg are ignored. The system stack pointer may be specified. +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + + +// AreSameSizeAndType returns true if all of the specified registers have the +// same size, and are of the same type. The system stack pointer may be +// specified. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg, + const CPURegister& reg5 = NoCPUReg, + const CPURegister& reg6 = NoCPUReg, + const CPURegister& reg7 = NoCPUReg, + const CPURegister& reg8 = NoCPUReg); + +// AreEven returns true if all of the specified registers have even register +// indices. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + + +// AreConsecutive returns true if all of the specified registers are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoCPUReg). +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + + +// AreSameFormat returns true if all of the specified VRegisters have the same +// vector format. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoVReg). +bool AreSameFormat(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + + +// AreConsecutive returns true if all of the specified VRegisters are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoVReg). +bool AreConsecutive(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + + // Lists of registers. class CPURegList { public: @@ -70,28 +580,6 @@ class CPURegList { VIXL_ASSERT(IsValid()); } - // Construct an empty CPURegList with the specified size and type. If `size` - // is CPURegister::kUnknownSize and the register type requires a size, a valid - // but unspecified default will be picked. - static CPURegList Empty(CPURegister::RegisterType type, - unsigned size = CPURegister::kUnknownSize) { - return CPURegList(type, GetDefaultSizeFor(type, size), 0); - } - - // Construct a CPURegList with all possible registers with the specified size - // and type. If `size` is CPURegister::kUnknownSize and the register type - // requires a size, a valid but unspecified default will be picked. - static CPURegList All(CPURegister::RegisterType type, - unsigned size = CPURegister::kUnknownSize) { - unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1); - RegList list = (static_cast<RegList>(1) << number_of_registers) - 1; - if (type == CPURegister::kRegister) { - // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it. - list |= (static_cast<RegList>(1) << kSPRegInternalCode); - } - return CPURegList(type, GetDefaultSizeFor(type, size), list); - } - CPURegister::RegisterType GetType() const { VIXL_ASSERT(IsValid()); return type_; @@ -100,10 +588,6 @@ class CPURegList { return GetType(); } - CPURegister::RegisterBank GetBank() const { - return CPURegister::GetBankFor(GetType()); - } - // Combine another CPURegList into this one. Registers that already exist in // this list are left unchanged. The type and size of the registers in the // 'other' list must match those in this list. @@ -200,11 +684,8 @@ class CPURegList { // preparing registers for an AAPCS64 function call, for example. void RemoveCalleeSaved(); - // Find the register in this list that appears in `mask` with the lowest or - // highest code, remove it from the list and return it as a CPURegister. If - // the list is empty, leave it unchanged and return NoCPUReg. - CPURegister PopLowestIndex(RegList mask = ~static_cast<RegList>(0)); - CPURegister PopHighestIndex(RegList mask = ~static_cast<RegList>(0)); + CPURegister PopLowestIndex(); + CPURegister PopHighestIndex(); // AAPCS64 callee-saved registers. static CPURegList GetCalleeSaved(unsigned size = kXRegSize); @@ -223,7 +704,7 @@ class CPURegList { bool IncludesAliasOf(const CPURegister& other) const { VIXL_ASSERT(IsValid()); - return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode()); + return (type_ == other.GetType()) && IncludesAliasOf(other.GetCode()); } bool IncludesAliasOf(int code) const { @@ -263,21 +744,6 @@ class CPURegList { } private: - // If `size` is CPURegister::kUnknownSize and the type requires a known size, - // then return an arbitrary-but-valid size. - // - // Otherwise, the size is checked for validity and returned unchanged. - static unsigned GetDefaultSizeFor(CPURegister::RegisterType type, - unsigned size) { - if (size == CPURegister::kUnknownSize) { - if (type == CPURegister::kRegister) size = kXRegSize; - if (type == CPURegister::kVRegister) size = kQRegSize; - // All other types require kUnknownSize. - } - VIXL_ASSERT(CPURegister(0, size, type).IsValid()); - return size; - } - RegList list_; int size_; CPURegister::RegisterType type_; @@ -295,7 +761,6 @@ extern const CPURegList kCalleeSavedV; extern const CPURegList kCallerSaved; extern const CPURegList kCallerSavedV; -class IntegerOperand; // Operand. class Operand { @@ -304,9 +769,7 @@ class Operand { // where <immediate> is int64_t. // This is allowed to be an implicit constructor because Operand is // a wrapper class that doesn't normally perform any type conversion. - Operand(int64_t immediate); // NOLINT(runtime/explicit) - - Operand(IntegerOperand immediate); // NOLINT(runtime/explicit) + Operand(int64_t immediate = 0); // NOLINT(runtime/explicit) // rm, {<shift> #<shift_amount>} // where <shift> is one of {LSL, LSR, ASR, ROR}. @@ -420,16 +883,6 @@ class MemOperand { return shift_amount_; } - // True for MemOperands which represent something like [x0]. - // Currently, this will also return true for [x0, #0], because MemOperand has - // no way to distinguish the two. - bool IsPlainRegister() const; - - // True for MemOperands which represent something like [x0], or for compound - // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr] - // or [x0, wzr, UXTW #3]. - bool IsEquivalentToPlainRegister() const; - // True for immediate-offset (but not indexed) MemOperands. bool IsImmediateOffset() const; // True for register-offset (but not indexed) MemOperands. @@ -465,448 +918,6 @@ class MemOperand { unsigned shift_amount_; }; -// SVE supports memory operands which don't make sense to the core ISA, such as -// scatter-gather forms, in which either the base or offset registers are -// vectors. This class exists to avoid complicating core-ISA code with -// SVE-specific behaviour. -// -// Note that SVE does not support any pre- or post-index modes. -class SVEMemOperand { - public: - // "vector-plus-immediate", like [z0.s, #21] - explicit SVEMemOperand(ZRegister base, uint64_t offset = 0) - : base_(base), - regoffset_(NoReg), - offset_(RawbitsToInt64(offset)), - mod_(NO_SVE_OFFSET_MODIFIER), - shift_amount_(0) { - VIXL_ASSERT(IsVectorPlusImmediate()); - VIXL_ASSERT(IsValid()); - } - - // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL] - // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL. - // - // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and - // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar - // instructions where xm defaults to xzr. However, users should not rely on - // `SVEMemOperand(x0, 0)` being accepted in such cases. - explicit SVEMemOperand(Register base, - uint64_t offset = 0, - SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER) - : base_(base), - regoffset_(NoReg), - offset_(RawbitsToInt64(offset)), - mod_(mod), - shift_amount_(0) { - VIXL_ASSERT(IsScalarPlusImmediate()); - VIXL_ASSERT(IsValid()); - } - - // "scalar-plus-scalar", like [x0, x1] - // "scalar-plus-vector", like [x0, z1.d] - SVEMemOperand(Register base, CPURegister offset) - : base_(base), - regoffset_(offset), - offset_(0), - mod_(NO_SVE_OFFSET_MODIFIER), - shift_amount_(0) { - VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector()); - if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar()); - VIXL_ASSERT(IsValid()); - } - - // "scalar-plus-vector", like [x0, z1.d, UXTW] - // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a - // corresponding `Extend` value. - template <typename M> - SVEMemOperand(Register base, ZRegister offset, M mod) - : base_(base), - regoffset_(offset), - offset_(0), - mod_(GetSVEOffsetModifierFor(mod)), - shift_amount_(0) { - VIXL_ASSERT(mod_ != SVE_LSL); // LSL requires an explicit shift amount. - VIXL_ASSERT(IsScalarPlusVector()); - VIXL_ASSERT(IsValid()); - } - - // "scalar-plus-scalar", like [x0, x1, LSL #1] - // "scalar-plus-vector", like [x0, z1.d, LSL #2] - // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding - // `Shift` or `Extend` value. - template <typename M> - SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount) - : base_(base), - regoffset_(offset), - offset_(0), - mod_(GetSVEOffsetModifierFor(mod)), - shift_amount_(shift_amount) { - VIXL_ASSERT(IsValid()); - } - - // "vector-plus-vector", like [z0.d, z1.d, UXTW] - template <typename M = SVEOffsetModifier> - SVEMemOperand(ZRegister base, - ZRegister offset, - M mod = NO_SVE_OFFSET_MODIFIER, - unsigned shift_amount = 0) - : base_(base), - regoffset_(offset), - offset_(0), - mod_(GetSVEOffsetModifierFor(mod)), - shift_amount_(shift_amount) { - VIXL_ASSERT(IsValid()); - VIXL_ASSERT(IsVectorPlusVector()); - } - - // True for SVEMemOperands which represent something like [x0]. - // This will also return true for [x0, #0], because there is no way - // to distinguish the two. - bool IsPlainScalar() const { - return IsScalarPlusImmediate() && (offset_ == 0); - } - - // True for SVEMemOperands which represent something like [x0], or for - // compound SVEMemOperands which are functionally equivalent, such as - // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3]. - bool IsEquivalentToScalar() const; - - // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and - // similar. - bool IsPlainRegister() const; - - bool IsScalarPlusImmediate() const { - return base_.IsX() && regoffset_.IsNone() && - ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl()); - } - - bool IsScalarPlusScalar() const { - // SVE offers no extend modes for scalar-plus-scalar, so both registers must - // be X registers. - return base_.IsX() && regoffset_.IsX() && - ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL)); - } - - bool IsScalarPlusVector() const { - // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike - // in the core ISA, these extend modes do not imply an S-sized lane, so the - // modifier is independent from the lane size. The architecture describes - // [US]XTW with a D-sized lane as an "unpacked" offset. - return base_.IsX() && regoffset_.IsZRegister() && - (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl(); - } - - bool IsVectorPlusImmediate() const { - return base_.IsZRegister() && - (base_.IsLaneSizeS() || base_.IsLaneSizeD()) && - regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER); - } - - bool IsVectorPlusVector() const { - return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) && - AreSameFormat(base_, regoffset_) && - (base_.IsLaneSizeS() || base_.IsLaneSizeD()); - } - - bool IsContiguous() const { return !IsScatterGather(); } - bool IsScatterGather() const { - return base_.IsZRegister() || regoffset_.IsZRegister(); - } - - // TODO: If necessary, add helpers like `HasScalarBase()`. - - Register GetScalarBase() const { - VIXL_ASSERT(base_.IsX()); - return Register(base_); - } - - ZRegister GetVectorBase() const { - VIXL_ASSERT(base_.IsZRegister()); - VIXL_ASSERT(base_.HasLaneSize()); - return ZRegister(base_); - } - - Register GetScalarOffset() const { - VIXL_ASSERT(regoffset_.IsRegister()); - return Register(regoffset_); - } - - ZRegister GetVectorOffset() const { - VIXL_ASSERT(regoffset_.IsZRegister()); - VIXL_ASSERT(regoffset_.HasLaneSize()); - return ZRegister(regoffset_); - } - - int64_t GetImmediateOffset() const { - VIXL_ASSERT(regoffset_.IsNone()); - return offset_; - } - - SVEOffsetModifier GetOffsetModifier() const { return mod_; } - unsigned GetShiftAmount() const { return shift_amount_; } - - bool IsEquivalentToLSL(unsigned amount) const { - if (shift_amount_ != amount) return false; - if (amount == 0) { - // No-shift is equivalent to "LSL #0". - return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER)); - } - return mod_ == SVE_LSL; - } - - bool IsMulVl() const { return mod_ == SVE_MUL_VL; } - - bool IsValid() const; - - private: - // Allow standard `Shift` and `Extend` arguments to be used. - SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) { - if (shift == LSL) return SVE_LSL; - if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER; - // SVE does not accept any other shift. - VIXL_UNIMPLEMENTED(); - return NO_SVE_OFFSET_MODIFIER; - } - - SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) { - if (extend == UXTW) return SVE_UXTW; - if (extend == SXTW) return SVE_SXTW; - if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER; - // SVE does not accept any other extend mode. - VIXL_UNIMPLEMENTED(); - return NO_SVE_OFFSET_MODIFIER; - } - - SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) { - return mod; - } - - CPURegister base_; - CPURegister regoffset_; - int64_t offset_; - SVEOffsetModifier mod_; - unsigned shift_amount_; -}; - -// Represent a signed or unsigned integer operand. -// -// This is designed to make instructions which naturally accept a _signed_ -// immediate easier to implement and use, when we also want users to be able to -// specify raw-bits values (such as with hexadecimal constants). The advantage -// of this class over a simple uint64_t (with implicit C++ sign-extension) is -// that this class can strictly check the range of allowed values. With a simple -// uint64_t, it is impossible to distinguish -1 from UINT64_MAX. -// -// For example, these instructions are equivalent: -// -// __ Insr(z0.VnB(), -1); -// __ Insr(z0.VnB(), 0xff); -// -// ... as are these: -// -// __ Insr(z0.VnD(), -1); -// __ Insr(z0.VnD(), 0xffffffffffffffff); -// -// ... but this is invalid: -// -// __ Insr(z0.VnB(), 0xffffffffffffffff); // Too big for B-sized lanes. -class IntegerOperand { - public: -#define VIXL_INT_TYPES(V) \ - V(char) V(short) V(int) V(long) V(long long) // NOLINT(runtime/int) -#define VIXL_DECL_INT_OVERLOADS(T) \ - /* These are allowed to be implicit constructors because this is a */ \ - /* wrapper class that doesn't normally perform any type conversion. */ \ - IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */ \ - : raw_bits_(immediate), /* Allow implicit sign-extension. */ \ - is_negative_(immediate < 0) {} \ - IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */ \ - : raw_bits_(immediate), is_negative_(false) {} - VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS) -#undef VIXL_DECL_INT_OVERLOADS -#undef VIXL_INT_TYPES - - // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned - // values will be misrepresented here. - explicit IntegerOperand(const Operand& operand) - : raw_bits_(operand.GetEquivalentImmediate()), - is_negative_(operand.GetEquivalentImmediate() < 0) {} - - bool IsIntN(unsigned n) const { - return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_)) - : vixl::IsIntN(n, raw_bits_); - } - bool IsUintN(unsigned n) const { - return !is_negative_ && vixl::IsUintN(n, raw_bits_); - } - - bool IsUint8() const { return IsUintN(8); } - bool IsUint16() const { return IsUintN(16); } - bool IsUint32() const { return IsUintN(32); } - bool IsUint64() const { return IsUintN(64); } - - bool IsInt8() const { return IsIntN(8); } - bool IsInt16() const { return IsIntN(16); } - bool IsInt32() const { return IsIntN(32); } - bool IsInt64() const { return IsIntN(64); } - - bool FitsInBits(unsigned n) const { - return is_negative_ ? IsIntN(n) : IsUintN(n); - } - bool FitsInLane(const CPURegister& zd) const { - return FitsInBits(zd.GetLaneSizeInBits()); - } - bool FitsInSignedLane(const CPURegister& zd) const { - return IsIntN(zd.GetLaneSizeInBits()); - } - bool FitsInUnsignedLane(const CPURegister& zd) const { - return IsUintN(zd.GetLaneSizeInBits()); - } - - // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to an unsigned integer - // in the range [0, UINT<n>_MAX] (using two's complement mapping). - uint64_t AsUintN(unsigned n) const { - VIXL_ASSERT(FitsInBits(n)); - return raw_bits_ & GetUintMask(n); - } - - uint8_t AsUint8() const { return static_cast<uint8_t>(AsUintN(8)); } - uint16_t AsUint16() const { return static_cast<uint16_t>(AsUintN(16)); } - uint32_t AsUint32() const { return static_cast<uint32_t>(AsUintN(32)); } - uint64_t AsUint64() const { return AsUintN(64); } - - // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to a signed integer in - // the range [INT<n>_MIN, INT<n>_MAX] (using two's complement mapping). - int64_t AsIntN(unsigned n) const { - VIXL_ASSERT(FitsInBits(n)); - return ExtractSignedBitfield64(n - 1, 0, raw_bits_); - } - - int8_t AsInt8() const { return static_cast<int8_t>(AsIntN(8)); } - int16_t AsInt16() const { return static_cast<int16_t>(AsIntN(16)); } - int32_t AsInt32() const { return static_cast<int32_t>(AsIntN(32)); } - int64_t AsInt64() const { return AsIntN(64); } - - // Several instructions encode a signed int<N>_t, which is then (optionally) - // left-shifted and sign-extended to a Z register lane with a size which may - // be larger than N. This helper tries to find an int<N>_t such that the - // IntegerOperand's arithmetic value is reproduced in each lane. - // - // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as - // `Insr(z0.VnB(), -1)`. - template <unsigned N, unsigned kShift, typename T> - bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const { - VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N); - VIXL_ASSERT(FitsInLane(zd)); - if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; - - // Reverse the specified left-shift. - IntegerOperand unshifted(*this); - unshifted.ArithmeticShiftRight(kShift); - - if (unshifted.IsIntN(N)) { - // This is trivial, since sign-extension produces the same arithmetic - // value irrespective of the destination size. - *imm = static_cast<T>(unshifted.AsIntN(N)); - return true; - } - - // Otherwise, we might be able to use the sign-extension to produce the - // desired bit pattern. We can only do this for values in the range - // [INT<N>_MAX + 1, UINT<N>_MAX], where the highest set bit is the sign bit. - // - // The lane size has to be adjusted to compensate for `kShift`, since the - // high bits will be dropped when the encoded value is left-shifted. - if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) { - int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift); - if (vixl::IsIntN(N, encoded)) { - *imm = static_cast<T>(encoded); - return true; - } - } - return false; - } - - // As above, but `kShift` is written to the `*shift` parameter on success, so - // that it is easy to chain calls like this: - // - // if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || - // imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { - // insn(zd, imm8, shift) - // } - template <unsigned N, unsigned kShift, typename T, typename S> - bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, - T* imm, - S* shift) const { - if (TryEncodeAsShiftedIntNForLane<N, kShift>(zd, imm)) { - *shift = kShift; - return true; - } - return false; - } - - // As above, but assume that `kShift` is 0. - template <unsigned N, typename T> - bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const { - return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm); - } - - // As above, but for unsigned fields. This is usuaully a simple operation, but - // is provided for symmetry. - template <unsigned N, unsigned kShift, typename T> - bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const { - VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N); - VIXL_ASSERT(FitsInLane(zd)); - - // TODO: Should we convert -1 to 0xff here? - if (is_negative_) return false; - USE(zd); - - if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; - - if (vixl::IsUintN(N, raw_bits_ >> kShift)) { - *imm = static_cast<T>(raw_bits_ >> kShift); - return true; - } - return false; - } - - template <unsigned N, unsigned kShift, typename T, typename S> - bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, - T* imm, - S* shift) const { - if (TryEncodeAsShiftedUintNForLane<N, kShift>(zd, imm)) { - *shift = kShift; - return true; - } - return false; - } - - bool IsZero() const { return raw_bits_ == 0; } - bool IsNegative() const { return is_negative_; } - bool IsPositiveOrZero() const { return !is_negative_; } - - uint64_t GetMagnitude() const { - return is_negative_ ? -raw_bits_ : raw_bits_; - } - - private: - // Shift the arithmetic value right, with sign extension if is_negative_. - void ArithmeticShiftRight(int shift) { - VIXL_ASSERT((shift >= 0) && (shift < 64)); - if (shift == 0) return; - if (is_negative_) { - raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_); - } else { - raw_bits_ >>= shift; - } - } - - uint64_t raw_bits_; - bool is_negative_; -}; - // This an abstraction that can represent a register or memory location. The // `MacroAssembler` provides helpers to move data between generic operands. class GenericOperand { diff --git a/src/aarch64/registers-aarch64.cc b/src/aarch64/registers-aarch64.cc deleted file mode 100644 index 735f43c7..00000000 --- a/src/aarch64/registers-aarch64.cc +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2019, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include <sstream> -#include <string> - -#include "registers-aarch64.h" - -namespace vixl { -namespace aarch64 { - -std::string CPURegister::GetArchitecturalName() const { - std::ostringstream name; - if (IsZRegister()) { - name << 'z' << GetCode(); - if (HasLaneSize()) { - name << '.' << GetLaneSizeSymbol(); - } - } else if (IsPRegister()) { - name << 'p' << GetCode(); - if (HasLaneSize()) { - name << '.' << GetLaneSizeSymbol(); - } - switch (qualifiers_) { - case kNoQualifiers: - break; - case kMerging: - name << "/m"; - break; - case kZeroing: - name << "/z"; - break; - } - } else { - VIXL_UNIMPLEMENTED(); - } - return name.str(); -} - -unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) { - switch (bank) { - case kNoRegisterBank: - return 0; - case kRRegisterBank: - return Register::GetMaxCode(); - case kVRegisterBank: -#ifdef VIXL_HAS_CONSTEXPR - VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); -#else - VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); -#endif - return VRegister::GetMaxCode(); - case kPRegisterBank: - return PRegister::GetMaxCode(); - } - VIXL_UNREACHABLE(); - return 0; -} - -bool CPURegister::IsValidRegister() const { - return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) && - (bank_ == kRRegisterBank) && - ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) && - (qualifiers_ == kNoQualifiers) && (lane_size_ == size_); -} - -bool CPURegister::IsValidVRegister() const { - VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); - return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) && - ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) && - (qualifiers_ == kNoQualifiers) && - (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_); -} - -bool CPURegister::IsValidFPRegister() const { - return IsValidVRegister() && IsFPRegister(); -} - -bool CPURegister::IsValidZRegister() const { - VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); - // Z registers are valid with or without a lane size, so we don't need to - // check lane_size_. - return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) && - (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers); -} - -bool CPURegister::IsValidPRegister() const { - VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); - // P registers are valid with or without a lane size, so we don't need to - // check lane_size_. - return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) && - (size_ == kEncodedUnknownSize) && - ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) || - (qualifiers_ == kZeroing)); -} - -bool CPURegister::IsValid() const { - return IsValidRegister() || IsValidVRegister() || IsValidZRegister() || - IsValidPRegister(); -} - -// Most coersions simply invoke the necessary constructor. -#define VIXL_CPUREG_COERCION_LIST(U) \ - U(Register, W, R) \ - U(Register, X, R) \ - U(VRegister, B, V) \ - U(VRegister, H, V) \ - U(VRegister, S, V) \ - U(VRegister, D, V) \ - U(VRegister, Q, V) \ - U(VRegister, V, V) \ - U(ZRegister, Z, V) \ - U(PRegister, P, P) -#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \ - RET_TYPE CPURegister::CTOR_TYPE() const { \ - VIXL_ASSERT(GetBank() == k##BANK##RegisterBank); \ - return CTOR_TYPE##Register(GetCode()); \ - } -VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION) -#undef VIXL_CPUREG_COERCION_LIST -#undef VIXL_DEFINE_CPUREG_COERCION - -// NEON lane-format coersions always return VRegisters. -#define VIXL_CPUREG_NEON_COERCION_LIST(V) \ - V(8, B) \ - V(16, B) \ - V(2, H) \ - V(4, H) \ - V(8, H) \ - V(2, S) \ - V(4, S) \ - V(1, D) \ - V(2, D) -#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE) \ - VRegister VRegister::V##LANES##LANE_TYPE() const { \ - VIXL_ASSERT(IsVRegister()); \ - return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \ - } -VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION) -#undef VIXL_CPUREG_NEON_COERCION_LIST -#undef VIXL_DEFINE_CPUREG_NEON_COERCION - -// Semantic type coersion for sdot and udot. -// TODO: Use the qualifiers_ field to distinguish this from ::S(). -VRegister VRegister::S4B() const { - VIXL_ASSERT(IsVRegister()); - return SRegister(GetCode()); -} - -bool AreAliased(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - int number_of_valid_regs = 0; - int number_of_valid_vregs = 0; - int number_of_valid_pregs = 0; - - RegList unique_regs = 0; - RegList unique_vregs = 0; - RegList unique_pregs = 0; - - const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; - - for (size_t i = 0; i < ArrayLength(regs); i++) { - switch (regs[i].GetBank()) { - case CPURegister::kRRegisterBank: - number_of_valid_regs++; - unique_regs |= regs[i].GetBit(); - break; - case CPURegister::kVRegisterBank: - number_of_valid_vregs++; - unique_vregs |= regs[i].GetBit(); - break; - case CPURegister::kPRegisterBank: - number_of_valid_pregs++; - unique_pregs |= regs[i].GetBit(); - break; - case CPURegister::kNoRegisterBank: - VIXL_ASSERT(regs[i].IsNone()); - break; - } - } - - int number_of_unique_regs = CountSetBits(unique_regs); - int number_of_unique_vregs = CountSetBits(unique_vregs); - int number_of_unique_pregs = CountSetBits(unique_pregs); - - VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); - VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs); - VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs); - - return (number_of_valid_regs != number_of_unique_regs) || - (number_of_valid_vregs != number_of_unique_vregs) || - (number_of_valid_pregs != number_of_unique_pregs); -} - -bool AreSameSizeAndType(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - VIXL_ASSERT(reg1.IsValid()); - bool match = true; - match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); - match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); - match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); - match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); - match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); - match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); - match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); - return match; -} - -bool AreEven(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4, - const CPURegister& reg5, - const CPURegister& reg6, - const CPURegister& reg7, - const CPURegister& reg8) { - VIXL_ASSERT(reg1.IsValid()); - bool even = (reg1.GetCode() % 2) == 0; - even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0); - even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0); - even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0); - even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0); - even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0); - even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0); - even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0); - return even; -} - -bool AreConsecutive(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - - if (!reg2.IsValid()) { - return true; - } else if (reg2.GetCode() != - ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { - return false; - } - - if (!reg3.IsValid()) { - return true; - } else if (reg3.GetCode() != - ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { - return false; - } - - if (!reg4.IsValid()) { - return true; - } else if (reg4.GetCode() != - ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { - return false; - } - - return true; -} - -bool AreSameFormat(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - bool match = true; - match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); - match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); - match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); - return match; -} - -bool AreSameLaneSize(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3, - const CPURegister& reg4) { - VIXL_ASSERT(reg1.IsValid()); - bool match = true; - match &= - !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); - match &= - !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); - match &= - !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); - return match; -} -} -} // namespace vixl::aarch64 diff --git a/src/aarch64/registers-aarch64.h b/src/aarch64/registers-aarch64.h deleted file mode 100644 index 911974a8..00000000 --- a/src/aarch64/registers-aarch64.h +++ /dev/null @@ -1,900 +0,0 @@ -// Copyright 2019, VIXL authors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of ARM Limited nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_ -#define VIXL_AARCH64_REGISTERS_AARCH64_H_ - -#include <string> - -#include "instructions-aarch64.h" - -namespace vixl { -namespace aarch64 { - -// An integer type capable of representing a homogeneous, non-overlapping set of -// registers as a bitmask of their codes. -typedef uint64_t RegList; -static const int kRegListSizeInBits = sizeof(RegList) * 8; - -class Register; -class WRegister; -class XRegister; - -class VRegister; -class BRegister; -class HRegister; -class SRegister; -class DRegister; -class QRegister; - -class ZRegister; - -class PRegister; -class PRegisterWithLaneSize; -class PRegisterM; -class PRegisterZ; - -// A container for any single register supported by the processor. Selected -// qualifications are also supported. Basic registers can be constructed -// directly as CPURegister objects. Other variants should be constructed as one -// of the derived classes. -// -// CPURegister aims to support any getter that would also be available to more -// specialised register types. However, using the equivalent functions on the -// specialised register types can avoid run-time checks, and should therefore be -// preferred where run-time polymorphism isn't required. -// -// Type-specific modifers are typically implemented only on the derived classes. -// -// The encoding is such that CPURegister objects are cheap to pass by value. -class CPURegister { - public: - enum RegisterBank : uint8_t { - kNoRegisterBank = 0, - kRRegisterBank, - kVRegisterBank, - kPRegisterBank - }; - enum RegisterType { - kNoRegister, - kRegister, - kVRegister, - kZRegister, - kPRegister - }; - - static const unsigned kUnknownSize = 0; - - VIXL_CONSTEXPR CPURegister() - : code_(0), - bank_(kNoRegisterBank), - size_(kEncodedUnknownSize), - qualifiers_(kNoQualifiers), - lane_size_(kEncodedUnknownSize) {} - - CPURegister(int code, int size_in_bits, RegisterType type) - : code_(code), - bank_(GetBankFor(type)), - size_(EncodeSizeInBits(size_in_bits)), - qualifiers_(kNoQualifiers), - lane_size_(EncodeSizeInBits(size_in_bits)) { - VIXL_ASSERT(IsValid()); - } - - // Basic accessors. - - // TODO: Make this return 'int'. - unsigned GetCode() const { return code_; } - - RegisterBank GetBank() const { return bank_; } - - // For scalar registers, the lane size matches the register size, and is - // always known. - bool HasSize() const { return size_ != kEncodedUnknownSize; } - bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; } - - RegList GetBit() const { - if (IsNone()) return 0; - VIXL_ASSERT(code_ < kRegListSizeInBits); - return static_cast<RegList>(1) << code_; - } - - // Return the architectural name for this register. - // TODO: This is temporary. Ultimately, we should move the - // Simulator::*RegNameForCode helpers out of the simulator, and provide an - // independent way to obtain the name of a register. - std::string GetArchitecturalName() const; - - // Return the highest valid register code for this type, to allow generic - // loops to be written. This excludes kSPRegInternalCode, since it is not - // contiguous, and sp usually requires special handling anyway. - unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); } - - // Registers without a known size report kUnknownSize. - int GetSizeInBits() const { return DecodeSizeInBits(size_); } - int GetSizeInBytes() const { return DecodeSizeInBytes(size_); } - // TODO: Make these return 'int'. - unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); } - unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); } - unsigned GetLaneSizeInBytesLog2() const { - VIXL_ASSERT(HasLaneSize()); - return DecodeSizeInBytesLog2(lane_size_); - } - - int GetLanes() const { - if (HasSize() && HasLaneSize()) { - // Take advantage of the size encoding to calculate this efficiently. - VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1)); - VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1)); - VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1)); - VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1)); - int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_); - VIXL_ASSERT(log2_delta >= 0); - return 1 << log2_delta; - } - return kUnknownSize; - } - - bool Is8Bits() const { return size_ == kEncodedBRegSize; } - bool Is16Bits() const { return size_ == kEncodedHRegSize; } - bool Is32Bits() const { return size_ == kEncodedSRegSize; } - bool Is64Bits() const { return size_ == kEncodedDRegSize; } - bool Is128Bits() const { return size_ == kEncodedQRegSize; } - - bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; } - bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; } - bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; } - bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; } - bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; } - - // If Is<Foo>Register(), then it is valid to convert the CPURegister to some - // <Foo>Register<Bar> type. - // - // If... ... then it is safe to construct ... - // r.IsRegister() -> Register(r) - // r.IsVRegister() -> VRegister(r) - // r.IsZRegister() -> ZRegister(r) - // r.IsPRegister() -> PRegister(r) - // - // r.IsPRegister() && HasLaneSize() -> PRegisterWithLaneSize(r) - // r.IsPRegister() && IsMerging() -> PRegisterM(r) - // r.IsPRegister() && IsZeroing() -> PRegisterZ(r) - bool IsRegister() const { return GetType() == kRegister; } - bool IsVRegister() const { return GetType() == kVRegister; } - bool IsZRegister() const { return GetType() == kZRegister; } - bool IsPRegister() const { return GetType() == kPRegister; } - - bool IsNone() const { return GetType() == kNoRegister; } - - // `GetType() == kNoRegister` implies IsNone(), and vice-versa. - // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa. - RegisterType GetType() const { - switch (bank_) { - case kNoRegisterBank: - return kNoRegister; - case kRRegisterBank: - return kRegister; - case kVRegisterBank: - return HasSize() ? kVRegister : kZRegister; - case kPRegisterBank: - return kPRegister; - } - VIXL_UNREACHABLE(); - return kNoRegister; - } - - // IsFPRegister() is true for scalar FP types (and therefore implies - // IsVRegister()). There is no corresponding FPRegister type. - bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); } - - // TODO: These are stricter forms of the helpers above. We should make the - // basic helpers strict, and remove these. - bool IsValidRegister() const; - bool IsValidVRegister() const; - bool IsValidFPRegister() const; - bool IsValidZRegister() const; - bool IsValidPRegister() const; - - bool IsValid() const; - bool IsValidOrNone() const { return IsNone() || IsValid(); } - - bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); } - bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); } - - bool IsSameType(const CPURegister& other) const { - return GetType() == other.GetType(); - } - - bool IsSameBank(const CPURegister& other) const { - return GetBank() == other.GetBank(); - } - - // Two registers with unknown size are considered to have the same size if - // they also have the same type. For example, all Z registers have the same - // size, even though we don't know what that is. - bool IsSameSizeAndType(const CPURegister& other) const { - return IsSameType(other) && (size_ == other.size_); - } - - bool IsSameFormat(const CPURegister& other) const { - return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_); - } - - // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'. - bool Aliases(const CPURegister& other) const { - return IsSameBank(other) && (code_ == other.code_); - } - - bool Is(const CPURegister& other) const { - if (IsRegister() || IsVRegister()) { - // For core (W, X) and FP/NEON registers, we only consider the code, size - // and type. This is legacy behaviour. - // TODO: We should probably check every field for all registers. - return Aliases(other) && (size_ == other.size_); - } else { - // For Z and P registers, we require all fields to match exactly. - VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister()); - return (code_ == other.code_) && (bank_ == other.bank_) && - (size_ == other.size_) && (qualifiers_ == other.qualifiers_) && - (lane_size_ == other.lane_size_); - } - } - - // Conversions to specific register types. The result is a register that - // aliases the original CPURegister. That is, the original register bank - // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all - // other properties are ignored. - // - // Typical usage: - // - // if (reg.GetBank() == kVRegisterBank) { - // DRegister d = reg.D(); - // ... - // } - // - // These could all return types with compile-time guarantees (like XRegister), - // but this breaks backwards-compatibility quite severely, particularly with - // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type. - - // Core registers, like "w0". - Register W() const; - Register X() const; - // FP/NEON registers, like "b0". - VRegister B() const; - VRegister H() const; - VRegister S() const; - VRegister D() const; - VRegister Q() const; - VRegister V() const; - // SVE registers, like "z0". - ZRegister Z() const; - PRegister P() const; - - // Utilities for kRegister types. - - bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); } - bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); } - bool IsW() const { return IsRegister() && Is32Bits(); } - bool IsX() const { return IsRegister() && Is64Bits(); } - - // Utilities for FP/NEON kVRegister types. - - // These helpers ensure that the size and type of the register are as - // described. They do not consider the number of lanes that make up a vector. - // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() - // does not imply Is1D() or Is8B(). - // Check the number of lanes, ie. the format of the vector, using methods such - // as Is8B(), Is1D(), etc. - bool IsB() const { return IsVRegister() && Is8Bits(); } - bool IsH() const { return IsVRegister() && Is16Bits(); } - bool IsS() const { return IsVRegister() && Is32Bits(); } - bool IsD() const { return IsVRegister() && Is64Bits(); } - bool IsQ() const { return IsVRegister() && Is128Bits(); } - - // As above, but also check that the register has exactly one lane. For - // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does - // not. - bool Is1B() const { return IsB() && IsScalar(); } - bool Is1H() const { return IsH() && IsScalar(); } - bool Is1S() const { return IsS() && IsScalar(); } - bool Is1D() const { return IsD() && IsScalar(); } - bool Is1Q() const { return IsQ() && IsScalar(); } - - // Check the specific NEON format. - bool Is8B() const { return IsD() && IsLaneSizeB(); } - bool Is16B() const { return IsQ() && IsLaneSizeB(); } - bool Is2H() const { return IsS() && IsLaneSizeH(); } - bool Is4H() const { return IsD() && IsLaneSizeH(); } - bool Is8H() const { return IsQ() && IsLaneSizeH(); } - bool Is2S() const { return IsD() && IsLaneSizeS(); } - bool Is4S() const { return IsQ() && IsLaneSizeS(); } - bool Is2D() const { return IsQ() && IsLaneSizeD(); } - - // A semantic alias for sdot and udot (indexed and by element) instructions. - // The current CPURegister implementation cannot not tell this from Is1S(), - // but it might do later. - // TODO: Do this with the qualifiers_ field. - bool Is1S4B() const { return Is1S(); } - - // Utilities for SVE registers. - - bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; } - bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); } - bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); } - - // SVE types have unknown sizes, but within known bounds. - - int GetMaxSizeInBytes() const { - switch (GetType()) { - case kZRegister: - return kZRegMaxSizeInBytes; - case kPRegister: - return kPRegMaxSizeInBytes; - default: - VIXL_ASSERT(HasSize()); - return GetSizeInBits(); - } - } - - int GetMinSizeInBytes() const { - switch (GetType()) { - case kZRegister: - return kZRegMinSizeInBytes; - case kPRegister: - return kPRegMinSizeInBytes; - default: - VIXL_ASSERT(HasSize()); - return GetSizeInBits(); - } - } - - int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; } - int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; } - - static RegisterBank GetBankFor(RegisterType type) { - switch (type) { - case kNoRegister: - return kNoRegisterBank; - case kRegister: - return kRRegisterBank; - case kVRegister: - case kZRegister: - return kVRegisterBank; - case kPRegister: - return kPRegisterBank; - } - VIXL_UNREACHABLE(); - return kNoRegisterBank; - } - - static unsigned GetMaxCodeFor(CPURegister::RegisterType type) { - return GetMaxCodeFor(GetBankFor(type)); - } - - protected: - enum EncodedSize : uint8_t { - // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero. - kEncodedUnknownSize = 0, - - // The implementation assumes that the remaining sizes are encoded as - // `log2(size) + c`, so the following names must remain in sequence. - kEncodedBRegSize, - kEncodedHRegSize, - kEncodedSRegSize, - kEncodedDRegSize, - kEncodedQRegSize, - - kEncodedWRegSize = kEncodedSRegSize, - kEncodedXRegSize = kEncodedDRegSize - }; - VIXL_STATIC_ASSERT(kSRegSize == kWRegSize); - VIXL_STATIC_ASSERT(kDRegSize == kXRegSize); - - char GetLaneSizeSymbol() const { - switch (lane_size_) { - case kEncodedBRegSize: - return 'B'; - case kEncodedHRegSize: - return 'H'; - case kEncodedSRegSize: - return 'S'; - case kEncodedDRegSize: - return 'D'; - case kEncodedQRegSize: - return 'Q'; - case kEncodedUnknownSize: - break; - } - VIXL_UNREACHABLE(); - return '?'; - } - - static EncodedSize EncodeSizeInBits(int size_in_bits) { - switch (size_in_bits) { - case kUnknownSize: - return kEncodedUnknownSize; - case kBRegSize: - return kEncodedBRegSize; - case kHRegSize: - return kEncodedHRegSize; - case kSRegSize: - return kEncodedSRegSize; - case kDRegSize: - return kEncodedDRegSize; - case kQRegSize: - return kEncodedQRegSize; - } - VIXL_UNREACHABLE(); - return kEncodedUnknownSize; - } - - static int DecodeSizeInBytesLog2(EncodedSize encoded_size) { - switch (encoded_size) { - case kEncodedUnknownSize: - // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here. - VIXL_UNREACHABLE(); - return -1; - case kEncodedBRegSize: - return kBRegSizeInBytesLog2; - case kEncodedHRegSize: - return kHRegSizeInBytesLog2; - case kEncodedSRegSize: - return kSRegSizeInBytesLog2; - case kEncodedDRegSize: - return kDRegSizeInBytesLog2; - case kEncodedQRegSize: - return kQRegSizeInBytesLog2; - } - VIXL_UNREACHABLE(); - return kUnknownSize; - } - - static int DecodeSizeInBytes(EncodedSize encoded_size) { - if (encoded_size == kEncodedUnknownSize) { - return kUnknownSize; - } - return 1 << DecodeSizeInBytesLog2(encoded_size); - } - - static int DecodeSizeInBits(EncodedSize encoded_size) { - VIXL_STATIC_ASSERT(kUnknownSize == 0); - return DecodeSizeInBytes(encoded_size) * kBitsPerByte; - } - - static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank); - - enum Qualifiers : uint8_t { - kNoQualifiers = 0, - // Used by P registers. - kMerging, - kZeroing - }; - - // An unchecked constructor, for use by derived classes. - CPURegister(int code, - EncodedSize size, - RegisterBank bank, - EncodedSize lane_size, - Qualifiers qualifiers = kNoQualifiers) - : code_(code), - bank_(bank), - size_(size), - qualifiers_(qualifiers), - lane_size_(lane_size) {} - - // TODO: Check that access to these fields is reasonably efficient. - uint8_t code_; - RegisterBank bank_; - EncodedSize size_; - Qualifiers qualifiers_; - EncodedSize lane_size_; -}; -// Ensure that CPURegisters can fit in a single (64-bit) register. This is a -// proxy for being "cheap to pass by value", which is hard to check directly. -VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t)); - -// TODO: Add constexpr constructors. -#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \ - VIXL_CONSTEXPR NAME() : PARENT_TYPE() {} \ - \ - explicit NAME(CPURegister other) : PARENT_TYPE(other) { \ - VIXL_ASSERT(IsValid()); \ - } \ - \ - VIXL_CONSTEXPR static unsigned GetMaxCode() { \ - return kNumberOf##REGISTER_TYPE##s - 1; \ - } - -// Any W or X register, including the zero register and the stack pointer. -class Register : public CPURegister { - public: - VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister) - - Register(int code, int size_in_bits) - : CPURegister(code, size_in_bits, kRegister) { - VIXL_ASSERT(IsValidRegister()); - } - - bool IsValid() const { return IsValidRegister(); } -}; - -// Any FP or NEON V register, including vector (V.<T>) and scalar forms -// (B, H, S, D, Q). -class VRegister : public CPURegister { - public: - VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister) - - // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0). - explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1) - : CPURegister(code, - EncodeSizeInBits(size_in_bits), - kVRegisterBank, - EncodeLaneSizeInBits(size_in_bits, lanes)) { - VIXL_ASSERT(IsValidVRegister()); - } - - VRegister(int code, VectorFormat format) - : CPURegister(code, - EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)), - kVRegisterBank, - EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), - kNoQualifiers) { - VIXL_ASSERT(IsValid()); - } - - VRegister V8B() const; - VRegister V16B() const; - VRegister V2H() const; - VRegister V4H() const; - VRegister V8H() const; - VRegister V2S() const; - VRegister V4S() const; - VRegister V1D() const; - VRegister V2D() const; - VRegister S4B() const; - - bool IsValid() const { return IsValidVRegister(); } - - protected: - static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) { - VIXL_ASSERT(lanes >= 1); - VIXL_ASSERT((size_in_bits % lanes) == 0); - return EncodeSizeInBits(size_in_bits / lanes); - } -}; - -// Any SVE Z register, with or without a lane size specifier. -class ZRegister : public CPURegister { - public: - VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister) - - explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize) - : CPURegister(code, - kEncodedUnknownSize, - kVRegisterBank, - EncodeSizeInBits(lane_size_in_bits)) { - VIXL_ASSERT(IsValid()); - } - - ZRegister(int code, VectorFormat format) - : CPURegister(code, - kEncodedUnknownSize, - kVRegisterBank, - EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), - kNoQualifiers) { - VIXL_ASSERT(IsValid()); - } - - // Return a Z register with a known lane size (like "z0.B"). - ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); } - ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); } - ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); } - ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); } - ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); } - - template <typename T> - ZRegister WithLaneSize(T format) const { - return ZRegister(GetCode(), format); - } - - ZRegister WithSameLaneSizeAs(const CPURegister& other) const { - VIXL_ASSERT(other.HasLaneSize()); - return this->WithLaneSize(other.GetLaneSizeInBits()); - } - - bool IsValid() const { return IsValidZRegister(); } -}; - -// Any SVE P register, with or without a qualifier or lane size specifier. -class PRegister : public CPURegister { - public: - VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister) - - explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) { - VIXL_ASSERT(IsValid()); - } - - bool IsValid() const { - return IsValidPRegister() && !HasLaneSize() && IsUnqualified(); - } - - // Return a P register with a known lane size (like "p0.B"). - PRegisterWithLaneSize VnB() const; - PRegisterWithLaneSize VnH() const; - PRegisterWithLaneSize VnS() const; - PRegisterWithLaneSize VnD() const; - - template <typename T> - PRegisterWithLaneSize WithLaneSize(T format) const; - - PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const; - - // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or - // "/m" (merging) suffix. These methods are VIXL's equivalents. - PRegisterZ Zeroing() const; - PRegisterM Merging() const; - - protected: - // Unchecked constructors, for use by derived classes. - PRegister(int code, EncodedSize encoded_lane_size) - : CPURegister(code, - kEncodedUnknownSize, - kPRegisterBank, - encoded_lane_size, - kNoQualifiers) {} - - PRegister(int code, Qualifiers qualifiers) - : CPURegister(code, - kEncodedUnknownSize, - kPRegisterBank, - kEncodedUnknownSize, - qualifiers) {} -}; - -// Any SVE P register with a known lane size (like "p0.B"). -class PRegisterWithLaneSize : public PRegister { - public: - VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister) - - PRegisterWithLaneSize(int code, int lane_size_in_bits) - : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) { - VIXL_ASSERT(IsValid()); - } - - PRegisterWithLaneSize(int code, VectorFormat format) - : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) { - VIXL_ASSERT(IsValid()); - } - - bool IsValid() const { - return IsValidPRegister() && HasLaneSize() && IsUnqualified(); - } - - // Overload lane size accessors so we can assert `HasLaneSize()`. This allows - // tools such as clang-tidy to prove that the result of GetLaneSize* is - // non-zero. - - // TODO: Make these return 'int'. - unsigned GetLaneSizeInBits() const { - VIXL_ASSERT(HasLaneSize()); - return PRegister::GetLaneSizeInBits(); - } - - unsigned GetLaneSizeInBytes() const { - VIXL_ASSERT(HasLaneSize()); - return PRegister::GetLaneSizeInBytes(); - } -}; - -// Any SVE P register with the zeroing qualifier (like "p0/z"). -class PRegisterZ : public PRegister { - public: - VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister) - - explicit PRegisterZ(int code) : PRegister(code, kZeroing) { - VIXL_ASSERT(IsValid()); - } - - bool IsValid() const { - return IsValidPRegister() && !HasLaneSize() && IsZeroing(); - } -}; - -// Any SVE P register with the merging qualifier (like "p0/m"). -class PRegisterM : public PRegister { - public: - VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister) - - explicit PRegisterM(int code) : PRegister(code, kMerging) { - VIXL_ASSERT(IsValid()); - } - - bool IsValid() const { - return IsValidPRegister() && !HasLaneSize() && IsMerging(); - } -}; - -inline PRegisterWithLaneSize PRegister::VnB() const { - return PRegisterWithLaneSize(GetCode(), kBRegSize); -} -inline PRegisterWithLaneSize PRegister::VnH() const { - return PRegisterWithLaneSize(GetCode(), kHRegSize); -} -inline PRegisterWithLaneSize PRegister::VnS() const { - return PRegisterWithLaneSize(GetCode(), kSRegSize); -} -inline PRegisterWithLaneSize PRegister::VnD() const { - return PRegisterWithLaneSize(GetCode(), kDRegSize); -} - -template <typename T> -inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const { - return PRegisterWithLaneSize(GetCode(), format); -} - -inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs( - const CPURegister& other) const { - VIXL_ASSERT(other.HasLaneSize()); - return this->WithLaneSize(other.GetLaneSizeInBits()); -} - -inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); } -inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); } - -#define VIXL_REGISTER_WITH_SIZE_LIST(V) \ - V(WRegister, kWRegSize, Register) \ - V(XRegister, kXRegSize, Register) \ - V(QRegister, kQRegSize, VRegister) \ - V(DRegister, kDRegSize, VRegister) \ - V(SRegister, kSRegSize, VRegister) \ - V(HRegister, kHRegSize, VRegister) \ - V(BRegister, kBRegSize, VRegister) - -#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT) \ - class NAME : public PARENT { \ - public: \ - VIXL_CONSTEXPR NAME() : PARENT() {} \ - explicit NAME(int code) : PARENT(code, SIZE) {} \ - \ - explicit NAME(PARENT other) : PARENT(other) { \ - VIXL_ASSERT(GetSizeInBits() == SIZE); \ - } \ - \ - PARENT As##PARENT() const { return *this; } \ - \ - VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; } \ - \ - bool IsValid() const { \ - return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \ - } \ - }; - -VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE) - -// No*Reg is used to provide default values for unused arguments, error cases -// and so on. Note that these (and the default constructors) all compare equal -// (using the Is() method). -const Register NoReg; -const VRegister NoVReg; -const CPURegister NoCPUReg; -const ZRegister NoZReg; - -// TODO: Ideally, these would use specialised register types (like XRegister and -// so on). However, doing so throws up template overloading problems elsewhere. -#define VIXL_DEFINE_REGISTERS(N) \ - const Register w##N = WRegister(N); \ - const Register x##N = XRegister(N); \ - const VRegister b##N = BRegister(N); \ - const VRegister h##N = HRegister(N); \ - const VRegister s##N = SRegister(N); \ - const VRegister d##N = DRegister(N); \ - const VRegister q##N = QRegister(N); \ - const VRegister v##N(N); \ - const ZRegister z##N(N); -AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS) -#undef VIXL_DEFINE_REGISTERS - -#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N); -AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS) -#undef VIXL_DEFINE_P_REGISTERS - -// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'. -const Register wsp = WRegister(kSPRegInternalCode); -const Register sp = XRegister(kSPRegInternalCode); - -// Standard aliases. -const Register ip0 = x16; -const Register ip1 = x17; -const Register lr = x30; -const Register xzr = x31; -const Register wzr = w31; - -// AreAliased returns true if any of the named registers overlap. Arguments -// set to NoReg are ignored. The system stack pointer may be specified. -bool AreAliased(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoReg, - const CPURegister& reg4 = NoReg, - const CPURegister& reg5 = NoReg, - const CPURegister& reg6 = NoReg, - const CPURegister& reg7 = NoReg, - const CPURegister& reg8 = NoReg); - -// AreSameSizeAndType returns true if all of the specified registers have the -// same size, and are of the same type. The system stack pointer may be -// specified. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoCPUReg). -bool AreSameSizeAndType(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg, - const CPURegister& reg5 = NoCPUReg, - const CPURegister& reg6 = NoCPUReg, - const CPURegister& reg7 = NoCPUReg, - const CPURegister& reg8 = NoCPUReg); - -// AreEven returns true if all of the specified registers have even register -// indices. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoCPUReg). -bool AreEven(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoReg, - const CPURegister& reg4 = NoReg, - const CPURegister& reg5 = NoReg, - const CPURegister& reg6 = NoReg, - const CPURegister& reg7 = NoReg, - const CPURegister& reg8 = NoReg); - -// AreConsecutive returns true if all of the specified registers are -// consecutive in the register file. Arguments set to NoReg are ignored, as are -// any subsequent arguments. At least one argument (reg1) must be valid -// (not NoCPUReg). -bool AreConsecutive(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg); - -// AreSameFormat returns true if all of the specified registers have the same -// vector format. Arguments set to NoReg are ignored, as are any subsequent -// arguments. At least one argument (reg1) must be valid (not NoVReg). -bool AreSameFormat(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg); - -// AreSameLaneSize returns true if all of the specified registers have the same -// element lane size, B, H, S or D. It doesn't compare the type of registers. -// Arguments set to NoReg are ignored, as are any subsequent arguments. -// At least one argument (reg1) must be valid (not NoVReg). -// TODO: Remove this, and replace its uses with AreSameFormat. -bool AreSameLaneSize(const CPURegister& reg1, - const CPURegister& reg2, - const CPURegister& reg3 = NoCPUReg, - const CPURegister& reg4 = NoCPUReg); -} -} // namespace vixl::aarch64 - -#endif // VIXL_AARCH64_REGISTERS_AARCH64_H_ diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 6d6d1677..855a2971 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -26,9 +26,6 @@ #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 -#include <errno.h> -#include <unistd.h> - #include <cmath> #include <cstring> #include <limits> @@ -68,13 +65,12 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) { Simulator::Simulator(Decoder* decoder, FILE* stream) - : movprfx_(NULL), cpu_features_auditor_(decoder, CPUFeatures::All()) { + : cpu_features_auditor_(decoder, CPUFeatures::All()) { // Ensure that shift operations act as the simulator expects. VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1); VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff); - // Set up a dummy pipe for CanReadMemory. - VIXL_CHECK(pipe(dummy_pipe_fd_) == 0); + instruction_stats_ = false; // Set up the decoder. decoder_ = decoder; @@ -95,10 +91,6 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) SetColouredTrace(false); trace_parameters_ = LOG_NONE; - // We have to configure the SVE vector register length before calling - // ResetState(). - SetVectorLengthInBits(kZRegMinSize); - ResetState(); // Allocate and set up the simulator stack. @@ -113,6 +105,8 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) tos = AlignDown(tos, 16); WriteSp(tos); + instrumentation_ = NULL; + // Print a warning about exclusive-access instructions, but only the first // time they are encountered. This warning can be silenced using // SilenceExclusiveAccessWarning(). @@ -122,111 +116,52 @@ Simulator::Simulator(Decoder* decoder, FILE* stream) // Initialize the common state of RNDR and RNDRRS. uint16_t seed[3] = {11, 22, 33}; - VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_)); - memcpy(rand_state_, seed, sizeof(rand_state_)); - - // Initialize all bits of pseudo predicate register to true. - LogicPRegister ones(pregister_all_true_); - ones.SetAllBits(); + VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rndr_state_)); + memcpy(rndr_state_, seed, sizeof(rndr_state_)); } -void Simulator::ResetSystemRegisters() { + +void Simulator::ResetState() { // Reset the system registers. nzcv_ = SimSystemRegister::DefaultValueFor(NZCV); fpcr_ = SimSystemRegister::DefaultValueFor(FPCR); - ResetFFR(); -} -void Simulator::ResetRegisters() { + // Reset registers to 0. + pc_ = NULL; + pc_modified_ = false; for (unsigned i = 0; i < kNumberOfRegisters; i++) { WriteXRegister(i, 0xbadbeef); } - // Returning to address 0 exits the Simulator. - WriteLr(kEndOfSimAddress); -} + // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP. + uint64_t nan_bits[] = { + UINT64_C(0x7ff00cab7f8ba9e1), UINT64_C(0x7ff0dead7f8beef1), + }; + VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits[0] & kDRegMask))); + VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits[0] & kSRegMask))); -void Simulator::ResetVRegisters() { - // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP. - VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0); - int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes; - for (unsigned i = 0; i < kNumberOfZRegisters; i++) { - VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes()); - vregisters_[i].NotifyAccessAsZ(); - for (int lane = 0; lane < lane_count; lane++) { - // Encode the register number and (D-sized) lane into each NaN, to - // make them easier to trace. - uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) | - (0x0000000000000001 * lane); - VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask))); - VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask))); - vregisters_[i].Insert(lane, nan_bits); - } - } -} + qreg_t q_bits; + VIXL_ASSERT(sizeof(q_bits) == sizeof(nan_bits)); + memcpy(&q_bits, nan_bits, sizeof(nan_bits)); -void Simulator::ResetPRegisters() { - VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0); - int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes; - // Ensure the register configuration fits in this bit encoding. - VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX); - VIXL_ASSERT(lane_count <= UINT8_MAX); - for (unsigned i = 0; i < kNumberOfPRegisters; i++) { - VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes()); - for (int lane = 0; lane < lane_count; lane++) { - // Encode the register number and (H-sized) lane into each lane slot. - uint16_t bits = (0x0100 * lane) | i; - pregisters_[i].Insert(lane, bits); - } + for (unsigned i = 0; i < kNumberOfVRegisters; i++) { + WriteQRegister(i, q_bits); } -} - -void Simulator::ResetFFR() { - VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0); - int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes; - ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes))); -} - -void Simulator::ResetState() { - ResetSystemRegisters(); - ResetRegisters(); - ResetVRegisters(); - ResetPRegisters(); - - pc_ = NULL; - pc_modified_ = false; + // Returning to address 0 exits the Simulator. + WriteLr(kEndOfSimAddress); - // BTI state. btype_ = DefaultBType; next_btype_ = DefaultBType; } -void Simulator::SetVectorLengthInBits(unsigned vector_length) { - VIXL_ASSERT((vector_length >= kZRegMinSize) && - (vector_length <= kZRegMaxSize)); - VIXL_ASSERT((vector_length % kZRegMinSize) == 0); - vector_length_ = vector_length; - - for (unsigned i = 0; i < kNumberOfZRegisters; i++) { - vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes()); - } - for (unsigned i = 0; i < kNumberOfPRegisters; i++) { - pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes()); - } - - ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes()); - - ResetVRegisters(); - ResetPRegisters(); - ResetFFR(); -} Simulator::~Simulator() { delete[] stack_; // The decoder may outlive the simulator. decoder_->RemoveVisitor(print_disasm_); delete print_disasm_; - close(dummy_pipe_fd_[0]); - close(dummy_pipe_fd_[1]); + + decoder_->RemoveVisitor(instrumentation_); + delete instrumentation_; } @@ -247,7 +182,6 @@ void Simulator::RunFrom(const Instruction* first) { } -// clang-format off const char* Simulator::xreg_names[] = {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", @@ -262,13 +196,6 @@ const char* Simulator::wreg_names[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"}; -const char* Simulator::breg_names[] = {"b0", "b1", "b2", "b3", "b4", "b5", - "b6", "b7", "b8", "b9", "b10", "b11", - "b12", "b13", "b14", "b15", "b16", "b17", - "b18", "b19", "b20", "b21", "b22", "b23", - "b24", "b25", "b26", "b27", "b28", "b29", - "b30", "b31"}; - const char* Simulator::hreg_names[] = {"h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", "h16", "h17", @@ -297,47 +224,27 @@ const char* Simulator::vreg_names[] = {"v0", "v1", "v2", "v3", "v4", "v5", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"}; -const char* Simulator::zreg_names[] = {"z0", "z1", "z2", "z3", "z4", "z5", - "z6", "z7", "z8", "z9", "z10", "z11", - "z12", "z13", "z14", "z15", "z16", "z17", - "z18", "z19", "z20", "z21", "z22", "z23", - "z24", "z25", "z26", "z27", "z28", "z29", - "z30", "z31"}; - -const char* Simulator::preg_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", - "p6", "p7", "p8", "p9", "p10", "p11", - "p12", "p13", "p14", "p15"}; -// clang-format on - const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) { + VIXL_ASSERT(code < kNumberOfRegisters); // If the code represents the stack pointer, index the name after zr. - if ((code == kSPRegInternalCode) || - ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) { + if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { code = kZeroRegCode + 1; } - VIXL_ASSERT(code < ArrayLength(wreg_names)); return wreg_names[code]; } const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) { + VIXL_ASSERT(code < kNumberOfRegisters); // If the code represents the stack pointer, index the name after zr. - if ((code == kSPRegInternalCode) || - ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) { + if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) { code = kZeroRegCode + 1; } - VIXL_ASSERT(code < ArrayLength(xreg_names)); return xreg_names[code]; } -const char* Simulator::BRegNameForCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfVRegisters); - return breg_names[code]; -} - - const char* Simulator::HRegNameForCode(unsigned code) { VIXL_ASSERT(code < kNumberOfVRegisters); return hreg_names[code]; @@ -362,39 +269,6 @@ const char* Simulator::VRegNameForCode(unsigned code) { } -const char* Simulator::ZRegNameForCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfZRegisters); - return zreg_names[code]; -} - - -const char* Simulator::PRegNameForCode(unsigned code) { - VIXL_ASSERT(code < kNumberOfPRegisters); - return preg_names[code]; -} - -SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) { - SimVRegister ones, result; - dup_immediate(kFormatVnB, ones, 0xff); - mov_zeroing(kFormatVnB, result, pg, ones); - return result; -} - -void Simulator::ExtractFromSimVRegister(VectorFormat vform, - SimPRegister& pd, - SimVRegister vreg) { - SimVRegister zero; - dup_immediate(kFormatVnB, zero, 0); - SVEIntCompareVectorsHelper(ne, - vform, - pd, - GetPTrue(), - vreg, - zero, - false, - LeaveFlags); -} - #define COLOUR(colour_code) "\033[0;" colour_code "m" #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m" #define COLOUR_HIGHLIGHT "\033[43m" @@ -417,8 +291,6 @@ void Simulator::SetColouredTrace(bool value) { clr_reg_value = value ? COLOUR(CYAN) : ""; clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : ""; clr_vreg_value = value ? COLOUR(MAGENTA) : ""; - clr_preg_name = value ? COLOUR_BOLD(GREEN) : ""; - clr_preg_value = value ? COLOUR(GREEN) : ""; clr_memory_address = value ? COLOUR_BOLD(BLUE) : ""; clr_warning = value ? COLOUR_BOLD(YELLOW) : ""; clr_warning_message = value ? COLOUR(YELLOW) : ""; @@ -450,6 +322,22 @@ void Simulator::SetTraceParameters(int parameters) { } +void Simulator::SetInstructionStats(bool value) { + if (value != instruction_stats_) { + if (value) { + if (instrumentation_ == NULL) { + // Set the sample period to 10, as the VIXL examples and tests are + // short. + instrumentation_ = new Instrument("vixl_stats.csv", 10); + } + decoder_->AppendVisitor(instrumentation_); + } else if (instrumentation_ != NULL) { + decoder_->RemoveVisitor(instrumentation_); + } + instruction_stats_ = value; + } +} + // Helpers --------------------------------------------------------------------- uint64_t Simulator::AddWithCarry(unsigned reg_size, bool set_flags, @@ -491,50 +379,44 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size, int64_t Simulator::ShiftOperand(unsigned reg_size, - uint64_t uvalue, + int64_t value, Shift shift_type, unsigned amount) const { - VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) || - (reg_size == kSRegSize) || (reg_size == kDRegSize)); - if (amount > 0) { - uint64_t mask = GetUintMask(reg_size); - bool is_negative = (uvalue & GetSignMask(reg_size)) != 0; - // The behavior is undefined in c++ if the shift amount greater than or - // equal to the register lane size. Work out the shifted result based on - // architectural behavior before performing the c++ type shfit operations. - switch (shift_type) { - case LSL: - if (amount >= reg_size) { - return UINT64_C(0); - } - uvalue <<= amount; - break; - case LSR: - if (amount >= reg_size) { - return UINT64_C(0); - } - uvalue >>= amount; - break; - case ASR: - if (amount >= reg_size) { - return is_negative ? ~UINT64_C(0) : UINT64_C(0); - } - uvalue >>= amount; - if (is_negative) { - // Simulate sign-extension to 64 bits. - uvalue |= ~UINT64_C(0) << (reg_size - amount); - } - break; - case ROR: { - uvalue = RotateRight(uvalue, amount, reg_size); - break; + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + if (amount == 0) { + return value; + } + uint64_t uvalue = static_cast<uint64_t>(value); + uint64_t mask = kWRegMask; + bool is_negative = (uvalue & kWSignMask) != 0; + if (reg_size == kXRegSize) { + mask = kXRegMask; + is_negative = (uvalue & kXSignMask) != 0; + } + + switch (shift_type) { + case LSL: + uvalue <<= amount; + break; + case LSR: + uvalue >>= amount; + break; + case ASR: + uvalue >>= amount; + if (is_negative) { + // Simulate sign-extension to 64 bits. + uvalue |= ~UINT64_C(0) << (reg_size - amount); } - default: - VIXL_UNIMPLEMENTED(); - return 0; + break; + case ROR: { + uvalue = RotateRight(uvalue, amount, reg_size); + break; } - uvalue &= mask; + default: + VIXL_UNIMPLEMENTED(); + return 0; } + uvalue &= mask; int64_t result; memcpy(&result, &uvalue, sizeof(result)); @@ -710,15 +592,6 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat( return kPrintReg1S; case kFormatD: return kPrintReg1D; - - case kFormatVnB: - return kPrintRegVnB; - case kFormatVnH: - return kPrintRegVnH; - case kFormatVnS: - return kPrintRegVnS; - case kFormatVnD: - return kPrintRegVnD; } } @@ -750,445 +623,301 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP( } } -void Simulator::PrintRegisters() { + +void Simulator::PrintWrittenRegisters() { for (unsigned i = 0; i < kNumberOfRegisters; i++) { - if (i == kSpRegCode) i = kSPRegInternalCode; - PrintRegister(i); + if (registers_[i].WrittenSinceLastLog()) PrintRegister(i); } } -void Simulator::PrintVRegisters() { + +void Simulator::PrintWrittenVRegisters() { for (unsigned i = 0; i < kNumberOfVRegisters; i++) { - PrintVRegister(i); + // At this point there is no type information, so print as a raw 1Q. + if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q); } } -void Simulator::PrintZRegisters() { - for (unsigned i = 0; i < kNumberOfZRegisters; i++) { - PrintZRegister(i); - } + +void Simulator::PrintSystemRegisters() { + PrintSystemRegister(NZCV); + PrintSystemRegister(FPCR); } -void Simulator::PrintWrittenRegisters() { + +void Simulator::PrintRegisters() { for (unsigned i = 0; i < kNumberOfRegisters; i++) { - if (registers_[i].WrittenSinceLastLog()) { - if (i == kSpRegCode) i = kSPRegInternalCode; - PrintRegister(i); - } + PrintRegister(i); } } -void Simulator::PrintWrittenVRegisters() { - bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE); + +void Simulator::PrintVRegisters() { for (unsigned i = 0; i < kNumberOfVRegisters; i++) { - if (vregisters_[i].WrittenSinceLastLog()) { - // Z registers are initialised in the constructor before the user can - // configure the CPU features, so we must also check for SVE here. - if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) { - PrintZRegister(i); - } else { - PrintVRegister(i); - } - } + // At this point there is no type information, so print as a raw 1Q. + PrintVRegister(i, kPrintReg1Q); } } -void Simulator::PrintWrittenPRegisters() { - // P registers are initialised in the constructor before the user can - // configure the CPU features, so we must check for SVE here. - if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return; - for (unsigned i = 0; i < kNumberOfPRegisters; i++) { - if (pregisters_[i].WrittenSinceLastLog()) { - PrintPRegister(i); - } + +// Print a register's name and raw value. +// +// Only the least-significant `size_in_bytes` bytes of the register are printed, +// but the value is aligned as if the whole register had been printed. +// +// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes +// -- the default -- so that the whole register is printed. Other values of +// size_in_bytes are intended for use when the register hasn't actually been +// updated (such as in PrintWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a memory access annotation). +void Simulator::PrintRegisterRawHelper(unsigned code, + Reg31Mode r31mode, + int size_in_bytes) { + // The template for all supported sizes. + // "# x{code}: 0xffeeddccbbaa9988" + // "# w{code}: 0xbbaa9988" + // "# w{code}<15:0>: 0x9988" + // "# w{code}<7:0>: 0x88" + unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2; + + const char* name = ""; + const char* suffix = ""; + switch (size_in_bytes) { + case kXRegSizeInBytes: + name = XRegNameForCode(code, r31mode); + break; + case kWRegSizeInBytes: + name = WRegNameForCode(code, r31mode); + break; + case 2: + name = WRegNameForCode(code, r31mode); + suffix = "<15:0>"; + padding_chars -= strlen(suffix); + break; + case 1: + name = WRegNameForCode(code, r31mode); + suffix = "<7:0>"; + padding_chars -= strlen(suffix); + break; + default: + VIXL_UNREACHABLE(); + } + fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix); + + // Print leading padding spaces. + VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2)); + for (unsigned i = 0; i < padding_chars; i++) { + putc(' ', stream_); } - if (ReadFFR().WrittenSinceLastLog()) PrintFFR(); + + // Print the specified bits in hexadecimal format. + uint64_t bits = ReadRegister<uint64_t>(code, r31mode); + bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8); + VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes); + + int chars = size_in_bytes * 2; + fprintf(stream_, + "%s0x%0*" PRIx64 "%s", + clr_reg_value, + chars, + bits, + clr_normal); } -void Simulator::PrintSystemRegisters() { - PrintSystemRegister(NZCV); - PrintSystemRegister(FPCR); + +void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) { + registers_[code].NotifyRegisterLogged(); + + // Don't print writes into xzr. + if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) { + return; + } + + // The template for all x and w registers: + // "# x{code}: 0x{value}" + // "# w{code}: 0x{value}" + + PrintRegisterRawHelper(code, r31mode); + fprintf(stream_, "\n"); } -void Simulator::PrintRegisterValue(const uint8_t* value, - int value_size, - PrintRegisterFormat format) { - int print_width = GetPrintRegSizeInBytes(format); - VIXL_ASSERT(print_width <= value_size); - for (int i = value_size - 1; i >= print_width; i--) { - // Pad with spaces so that values align vertically. + +// Print a register's name and raw value. +// +// The `bytes` and `lsb` arguments can be used to limit the bytes that are +// printed. These arguments are intended for use in cases where register hasn't +// actually been updated (such as in PrintVWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a floating-point interpretation or a memory access annotation). +void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) { + // The template for vector types: + // "# v{code}: 0xffeeddccbbaa99887766554433221100". + // An example with bytes=4 and lsb=8: + // "# v{code}: 0xbbaa9988 ". + fprintf(stream_, + "# %s%5s: %s", + clr_vreg_name, + VRegNameForCode(code), + clr_vreg_value); + + int msb = lsb + bytes - 1; + int byte = kQRegSizeInBytes - 1; + + // Print leading padding spaces. (Two spaces per byte.) + while (byte > msb) { fprintf(stream_, " "); - // If we aren't explicitly printing a partial value, ensure that the - // unprinted bits are zero. - VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0)); + byte--; } + + // Print the specified part of the value, byte by byte. + qreg_t rawbits = ReadQRegister(code); fprintf(stream_, "0x"); - for (int i = print_width - 1; i >= 0; i--) { - fprintf(stream_, "%02x", value[i]); + while (byte >= lsb) { + fprintf(stream_, "%02x", rawbits.val[byte]); + byte--; } -} -void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value, - uint16_t lane_mask, - PrintRegisterFormat format) { - VIXL_ASSERT((format & kPrintRegAsFP) != 0); - int lane_size = GetPrintRegLaneSizeInBytes(format); - fprintf(stream_, " ("); - bool last_inactive = false; - const char* sep = ""; - for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") { - bool access = (lane_mask & (1 << (i * lane_size))) != 0; - if (access) { - // Read the lane as a double, so we can format all FP types in the same - // way. We squash NaNs, and a double can exactly represent any other value - // that the smaller types can represent, so this is lossless. - double element; - switch (lane_size) { - case kHRegSizeInBytes: { - Float16 element_fp16; - VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes); - memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16)); - element = FPToDouble(element_fp16, kUseDefaultNaN); - break; - } - case kSRegSizeInBytes: { - float element_fp32; - memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32)); - element = static_cast<double>(element_fp32); - break; - } - case kDRegSizeInBytes: { - memcpy(&element, &value[i * lane_size], sizeof(element)); - break; - } - default: - VIXL_UNREACHABLE(); - fprintf(stream_, "{UnknownFPValue}"); - continue; - } - if (IsNaN(element)) { - // The fprintf behaviour for NaNs is implementation-defined. Always - // print "nan", so that traces are consistent. - fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal); - } else { - fprintf(stream_, - "%s%s%#.4g%s", - sep, - clr_vreg_value, - element, - clr_normal); - } - last_inactive = false; - } else if (!last_inactive) { - // Replace each contiguous sequence of inactive lanes with "...". - fprintf(stream_, "%s...", sep); - last_inactive = true; - } + // Print trailing padding spaces. + while (byte >= 0) { + fprintf(stream_, " "); + byte--; } - fprintf(stream_, ")"); + fprintf(stream_, "%s", clr_normal); } -void Simulator::PrintRegister(int code, - PrintRegisterFormat format, - const char* suffix) { - VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) || - (static_cast<unsigned>(code) == kSPRegInternalCode)); - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar); - VIXL_ASSERT((format & kPrintRegAsFP) == 0); - SimRegister* reg; - SimRegister zero; - if (code == kZeroRegCode) { - reg = &zero; - } else { - // registers_[31] holds the SP. - VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31); - reg = ®isters_[code % kNumberOfRegisters]; - } - - // We trace register writes as whole register values, implying that any - // unprinted bits are all zero: - // "# x{code}: 0x{-----value----}" - // "# w{code}: 0x{-value}" - // Stores trace partial register values, implying nothing about the unprinted - // bits: - // "# x{code}<63:0>: 0x{-----value----}" - // "# x{code}<31:0>: 0x{-value}" - // "# x{code}<15:0>: 0x{--}" - // "# x{code}<7:0>: 0x{}" - - bool is_partial = (format & kPrintRegPartial) != 0; - unsigned print_reg_size = GetPrintRegSizeInBits(format); - std::stringstream name; - if (is_partial) { - name << XRegNameForCode(code) << GetPartialRegSuffix(format); - } else { - // Notify the register that it has been logged, but only if we're printing - // all of it. - reg->NotifyRegisterLogged(); - switch (print_reg_size) { - case kWRegSize: - name << WRegNameForCode(code); +// Print each of the specified lanes of a register as a float or double value. +// +// The `lane_count` and `lslane` arguments can be used to limit the lanes that +// are printed. These arguments are intended for use in cases where register +// hasn't actually been updated (such as in PrintVWrite). +// +// No newline is printed. This allows the caller to print more details (such as +// a memory access annotation). +void Simulator::PrintVRegisterFPHelper(unsigned code, + unsigned lane_size_in_bytes, + int lane_count, + int rightmost_lane) { + VIXL_ASSERT((lane_size_in_bytes == kHRegSizeInBytes) || + (lane_size_in_bytes == kSRegSizeInBytes) || + (lane_size_in_bytes == kDRegSizeInBytes)); + + unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes); + VIXL_ASSERT(msb <= kQRegSizeInBytes); + + // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register + // name is used: + // " (h{code}: {value})" + // " (s{code}: {value})" + // " (d{code}: {value})" + // For vector types, "..." is used to represent one or more omitted lanes. + // " (..., {value}, {value}, ...)" + if (lane_size_in_bytes == kHRegSizeInBytes) { + // TODO: Trace tests will fail until we regenerate them. + return; + } + if ((lane_count == 1) && (rightmost_lane == 0)) { + const char* name; + switch (lane_size_in_bytes) { + case kHRegSizeInBytes: + name = HRegNameForCode(code); break; - case kXRegSize: - name << XRegNameForCode(code); + case kSRegSizeInBytes: + name = SRegNameForCode(code); + break; + case kDRegSizeInBytes: + name = DRegNameForCode(code); break; default: + name = NULL; VIXL_UNREACHABLE(); - return; + } + fprintf(stream_, " (%s%s: ", clr_vreg_name, name); + } else { + if (msb < (kQRegSizeInBytes - 1)) { + fprintf(stream_, " (..., "); + } else { + fprintf(stream_, " ("); } } - fprintf(stream_, - "# %s%*s: %s", - clr_reg_name, - kPrintRegisterNameFieldWidth, - name.str().c_str(), - clr_reg_value); - PrintRegisterValue(*reg, format); - fprintf(stream_, "%s%s", clr_normal, suffix); -} - -void Simulator::PrintVRegister(int code, - PrintRegisterFormat format, - const char* suffix) { - VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters); - VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) || - ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) || - ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector)); - - // We trace register writes as whole register values, implying that any - // unprinted bits are all zero: - // "# v{code}: 0x{-------------value------------}" - // "# d{code}: 0x{-----value----}" - // "# s{code}: 0x{-value}" - // "# h{code}: 0x{--}" - // "# b{code}: 0x{}" - // Stores trace partial register values, implying nothing about the unprinted - // bits: - // "# v{code}<127:0>: 0x{-------------value------------}" - // "# v{code}<63:0>: 0x{-----value----}" - // "# v{code}<31:0>: 0x{-value}" - // "# v{code}<15:0>: 0x{--}" - // "# v{code}<7:0>: 0x{}" - - bool is_partial = ((format & kPrintRegPartial) != 0); - std::stringstream name; - unsigned print_reg_size = GetPrintRegSizeInBits(format); - if (is_partial) { - name << VRegNameForCode(code) << GetPartialRegSuffix(format); - } else { - // Notify the register that it has been logged, but only if we're printing - // all of it. - vregisters_[code].NotifyRegisterLogged(); - switch (print_reg_size) { - case kBRegSize: - name << BRegNameForCode(code); + // Print the list of values. + const char* separator = ""; + int leftmost_lane = rightmost_lane + lane_count - 1; + for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { + double value; + switch (lane_size_in_bytes) { + case kHRegSizeInBytes: + value = ReadVRegister(code).GetLane<uint16_t>(lane); break; - case kHRegSize: - name << HRegNameForCode(code); + case kSRegSizeInBytes: + value = ReadVRegister(code).GetLane<float>(lane); break; - case kSRegSize: - name << SRegNameForCode(code); - break; - case kDRegSize: - name << DRegNameForCode(code); - break; - case kQRegSize: - name << VRegNameForCode(code); + case kDRegSizeInBytes: + value = ReadVRegister(code).GetLane<double>(lane); break; default: + value = 0.0; VIXL_UNREACHABLE(); - return; } - } - - fprintf(stream_, - "# %s%*s: %s", - clr_vreg_name, - kPrintRegisterNameFieldWidth, - name.str().c_str(), - clr_vreg_value); - PrintRegisterValue(vregisters_[code], format); - fprintf(stream_, "%s", clr_normal); - if ((format & kPrintRegAsFP) != 0) { - PrintRegisterValueFPAnnotations(vregisters_[code], format); - } - fprintf(stream_, "%s", suffix); -} - -void Simulator::PrintVRegistersForStructuredAccess(int rt_code, - int reg_count, - uint16_t focus_mask, - PrintRegisterFormat format) { - bool print_fp = (format & kPrintRegAsFP) != 0; - // Suppress FP formatting, so we can specify the lanes we're interested in. - PrintRegisterFormat format_no_fp = - static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP); - - for (int r = 0; r < reg_count; r++) { - int code = (rt_code + r) % kNumberOfVRegisters; - PrintVRegister(code, format_no_fp, ""); - if (print_fp) { - PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format); + if (IsNaN(value)) { + // The output for NaNs is implementation defined. Always print `nan`, so + // that traces are coherent across different implementations. + fprintf(stream_, "%s%snan%s", separator, clr_vreg_value, clr_normal); + } else { + fprintf(stream_, + "%s%s%#g%s", + separator, + clr_vreg_value, + value, + clr_normal); } - fprintf(stream_, "\n"); + separator = ", "; } -} - -void Simulator::PrintZRegistersForStructuredAccess(int rt_code, - int q_index, - int reg_count, - uint16_t focus_mask, - PrintRegisterFormat format) { - bool print_fp = (format & kPrintRegAsFP) != 0; - // Suppress FP formatting, so we can specify the lanes we're interested in. - PrintRegisterFormat format_no_fp = - static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP); - - PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format); - const unsigned size = kQRegSizeInBytes; - unsigned byte_index = q_index * size; - const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index; - VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes()); - - for (int r = 0; r < reg_count; r++) { - int code = (rt_code + r) % kNumberOfZRegisters; - PrintPartialZRegister(code, q_index, format_no_fp, ""); - if (print_fp) { - PrintRegisterValueFPAnnotations(value, focus_mask, format_q); - } - fprintf(stream_, "\n"); + if (rightmost_lane > 0) { + fprintf(stream_, ", ..."); } + fprintf(stream_, ")"); } -void Simulator::PrintZRegister(int code, PrintRegisterFormat format) { - // We're going to print the register in parts, so force a partial format. - format = GetPrintRegPartial(format); - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - for (unsigned i = 0; i < (vl / kQRegSize); i++) { - PrintPartialZRegister(code, i, format); - } + +void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) { vregisters_[code].NotifyRegisterLogged(); -} -void Simulator::PrintPRegister(int code, PrintRegisterFormat format) { - // We're going to print the register in parts, so force a partial format. - format = GetPrintRegPartial(format); - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - for (unsigned i = 0; i < (vl / kQRegSize); i++) { - PrintPartialPRegister(code, i, format); - } - pregisters_[code].NotifyRegisterLogged(); -} + int lane_size_log2 = format & kPrintRegLaneSizeMask; -void Simulator::PrintFFR(PrintRegisterFormat format) { - // We're going to print the register in parts, so force a partial format. - format = GetPrintRegPartial(format); - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - SimPRegister& ffr = ReadFFR(); - for (unsigned i = 0; i < (vl / kQRegSize); i++) { - PrintPartialPRegister("FFR", ffr, i, format); + int reg_size_log2; + if (format & kPrintRegAsQVector) { + reg_size_log2 = kQRegSizeInBytesLog2; + } else if (format & kPrintRegAsDVector) { + reg_size_log2 = kDRegSizeInBytesLog2; + } else { + // Scalar types. + reg_size_log2 = lane_size_log2; } - ffr.NotifyRegisterLogged(); -} - -void Simulator::PrintPartialZRegister(int code, - int q_index, - PrintRegisterFormat format, - const char* suffix) { - VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters); - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - VIXL_ASSERT((format & kPrintRegPartial) != 0); - VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits()); - // We _only_ trace partial Z register values in Q-sized chunks, because - // they're often too large to reasonably fit on a single line. Each line - // implies nothing about the unprinted bits. - // "# z{code}<127:0>: 0x{-------------value------------}" + int lane_count = 1 << (reg_size_log2 - lane_size_log2); + int lane_size = 1 << lane_size_log2; - format = GetPrintRegAsQChunkOfSVE(format); + // The template for vector types: + // "# v{code}: 0x{rawbits} (..., {value}, ...)". + // The template for scalar types: + // "# v{code}: 0x{rawbits} ({reg}:{value})". + // The values in parentheses after the bit representations are floating-point + // interpretations. They are displayed only if the kPrintVRegAsFP bit is set. - const unsigned size = kQRegSizeInBytes; - unsigned byte_index = q_index * size; - const uint8_t* value = vregisters_[code].GetBytes() + byte_index; - VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes()); - - int lsb = q_index * kQRegSize; - int msb = lsb + kQRegSize - 1; - std::stringstream name; - name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>'; - - fprintf(stream_, - "# %s%*s: %s", - clr_vreg_name, - kPrintRegisterNameFieldWidth, - name.str().c_str(), - clr_vreg_value); - PrintRegisterValue(value, size, format); - fprintf(stream_, "%s", clr_normal); - if ((format & kPrintRegAsFP) != 0) { - PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format); + PrintVRegisterRawHelper(code); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(code, lane_size, lane_count); } - fprintf(stream_, "%s", suffix); -} - -void Simulator::PrintPartialPRegister(const char* name, - const SimPRegister& reg, - int q_index, - PrintRegisterFormat format, - const char* suffix) { - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - VIXL_ASSERT((format & kPrintRegPartial) != 0); - VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits()); - // We don't currently use the format for anything here. - USE(format); - - // We _only_ trace partial P register values, because they're often too large - // to reasonably fit on a single line. Each line implies nothing about the - // unprinted bits. - // - // We print values in binary, with spaces between each bit, in order for the - // bits to align with the Z register bytes that they predicate. - // "# {name}<15:0>: 0b{-------------value------------}" - - int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit; - int lsb = q_index * print_size_in_bits; - int msb = lsb + print_size_in_bits - 1; - std::stringstream prefix; - prefix << name << '<' << msb << ':' << lsb << '>'; - - fprintf(stream_, - "# %s%*s: %s0b", - clr_preg_name, - kPrintRegisterNameFieldWidth, - prefix.str().c_str(), - clr_preg_value); - for (int i = msb; i >= lsb; i--) { - fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0'); - } - fprintf(stream_, "%s%s", clr_normal, suffix); + fprintf(stream_, "\n"); } -void Simulator::PrintPartialPRegister(int code, - int q_index, - PrintRegisterFormat format, - const char* suffix) { - VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters); - PrintPartialPRegister(PRegNameForCode(code), - pregisters_[code], - q_index, - format, - suffix); -} void Simulator::PrintSystemRegister(SystemRegister id) { switch (id) { @@ -1225,348 +954,91 @@ void Simulator::PrintSystemRegister(SystemRegister id) { } } -uint16_t Simulator::PrintPartialAccess(uint16_t access_mask, - uint16_t future_access_mask, - int struct_element_count, - int lane_size_in_bytes, - const char* op, - uintptr_t address, - int reg_size_in_bytes) { - // We want to assume that we'll access at least one lane. - VIXL_ASSERT(access_mask != 0); - VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) || - (reg_size_in_bytes == kQRegSizeInBytes)); - bool started_annotation = false; - // Indent to match the register field, the fixed formatting, and the value - // prefix ("0x"): "# {name}: 0x" - fprintf(stream_, "# %*s ", kPrintRegisterNameFieldWidth, ""); - // First, annotate the lanes (byte by byte). - for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) { - bool access = (access_mask & (1 << lane)) != 0; - bool future = (future_access_mask & (1 << lane)) != 0; - if (started_annotation) { - // If we've started an annotation, draw a horizontal line in addition to - // any other symbols. - if (access) { - fprintf(stream_, "─╨"); - } else if (future) { - fprintf(stream_, "─║"); - } else { - fprintf(stream_, "──"); - } - } else { - if (access) { - started_annotation = true; - fprintf(stream_, " ╙"); - } else if (future) { - fprintf(stream_, " ║"); - } else { - fprintf(stream_, " "); - } - } - } - VIXL_ASSERT(started_annotation); - fprintf(stream_, "─ 0x"); - int lane_size_in_nibbles = lane_size_in_bytes * 2; - // Print the most-significant struct element first. - const char* sep = ""; - for (int i = struct_element_count - 1; i >= 0; i--) { - int offset = lane_size_in_bytes * i; - uint64_t nibble = Memory::Read(lane_size_in_bytes, address + offset); - fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble); - sep = "'"; - } - fprintf(stream_, - " %s %s0x%016" PRIxPTR "%s\n", - op, - clr_memory_address, - address, - clr_normal); - return future_access_mask & ~access_mask; -} -void Simulator::PrintAccess(int code, - PrintRegisterFormat format, - const char* op, - uintptr_t address) { - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - if ((format & kPrintRegPartial) == 0) { - registers_[code].NotifyRegisterLogged(); - } - // Scalar-format accesses use a simple format: - // "# {reg}: 0x{value} -> {address}" +void Simulator::PrintRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { + registers_[reg_code].NotifyRegisterLogged(); - // Suppress the newline, so the access annotation goes on the same line. - PrintRegister(code, format, ""); + USE(format); + + // The template is "# {reg}: 0x{value} <- {address}". + PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister); fprintf(stream_, - " %s %s0x%016" PRIxPTR "%s\n", - op, + " <- %s0x%016" PRIxPTR "%s\n", clr_memory_address, address, clr_normal); } -void Simulator::PrintVAccess(int code, - PrintRegisterFormat format, - const char* op, - uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - // Scalar-format accesses use a simple format: - // "# v{code}: 0x{value} -> {address}" +void Simulator::PrintVRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane) { + vregisters_[reg_code].NotifyRegisterLogged(); - // Suppress the newline, so the access annotation goes on the same line. - PrintVRegister(code, format, ""); + // The template is "# v{code}: 0x{rawbits} <- address". + PrintVRegisterRawHelper(reg_code); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(reg_code, + GetPrintRegLaneSizeInBytes(format), + GetPrintRegLaneCount(format), + lane); + } fprintf(stream_, - " %s %s0x%016" PRIxPTR "%s\n", - op, + " <- %s0x%016" PRIxPTR "%s\n", clr_memory_address, address, clr_normal); } -void Simulator::PrintVStructAccess(int rt_code, - int reg_count, - PrintRegisterFormat format, - const char* op, - uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // For example: - // "# v{code}: 0x{value}" - // "# ...: 0x{value}" - // "# ║ ╙─ {struct_value} -> {lowest_address}" - // "# ╙───── {struct_value} -> {highest_address}" - - uint16_t lane_mask = GetPrintRegLaneMask(format); - PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); - - int reg_size_in_bytes = GetPrintRegSizeInBytes(format); - int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); - for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) { - uint16_t access_mask = 1 << i; - VIXL_ASSERT((lane_mask & access_mask) != 0); - lane_mask = PrintPartialAccess(access_mask, - lane_mask, - reg_count, - lane_size_in_bytes, - op, - address + (i * reg_count)); - } -} - -void Simulator::PrintVSingleStructAccess(int rt_code, - int reg_count, - int lane, - PrintRegisterFormat format, - const char* op, - uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // For example: - // "# v{code}: 0x{value}" - // "# ...: 0x{value}" - // "# ╙───── {struct_value} -> {address}" - - int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); - uint16_t lane_mask = 1 << (lane * lane_size_in_bytes); - PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); - PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address); -} - -void Simulator::PrintVReplicatingStructAccess(int rt_code, - int reg_count, - PrintRegisterFormat format, - const char* op, - uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // For example: - // "# v{code}: 0x{value}" - // "# ...: 0x{value}" - // "# ╙─╨─╨─╨─ {struct_value} -> {address}" - - int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format); - uint16_t lane_mask = GetPrintRegLaneMask(format); - PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format); - PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address); -} - -void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // Scalar-format accesses are split into separate chunks, each of which uses a - // simple format: - // "# z{code}<127:0>: 0x{value} -> {address}" - // "# z{code}<255:128>: 0x{value} -> {address + 16}" - // "# z{code}<383:256>: 0x{value} -> {address + 32}" - // etc - - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { - // Suppress the newline, so the access annotation goes on the same line. - PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, ""); - fprintf(stream_, - " %s %s0x%016" PRIxPTR "%s\n", - op, - clr_memory_address, - address, - clr_normal); - address += kQRegSizeInBytes; - } -} - -void Simulator::PrintZStructAccess(int rt_code, - int reg_count, - const LogicPRegister& pg, - PrintRegisterFormat format, - int msize_in_bytes, - const char* op, - const LogicSVEAddressVector& addr) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // For example: - // "# z{code}<255:128>: 0x{value}" - // "# ...<255:128>: 0x{value}" - // "# ║ ╙─ {struct_value} -> {first_address}" - // "# ╙───── {struct_value} -> {last_address}" - - // We're going to print the register in parts, so force a partial format. - bool skip_inactive_chunks = (format & kPrintRegPartial) != 0; - format = GetPrintRegPartial(format); - - int esize_in_bytes = GetPrintRegLaneSizeInBytes(format); - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - int lanes_per_q = kQRegSizeInBytes / esize_in_bytes; - for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { - uint16_t pred = - pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format); - if ((pred == 0) && skip_inactive_chunks) continue; - - PrintZRegistersForStructuredAccess(rt_code, - q_index, - reg_count, - pred, - format); - if (pred == 0) { - // This register chunk has no active lanes. The loop below would print - // nothing, so leave a blank line to keep structures grouped together. - fprintf(stream_, "#\n"); - continue; - } - for (int i = 0; i < lanes_per_q; i++) { - uint16_t access = 1 << (i * esize_in_bytes); - int lane = (q_index * lanes_per_q) + i; - // Skip inactive lanes. - if ((pred & access) == 0) continue; - pred = PrintPartialAccess(access, - pred, - reg_count, - msize_in_bytes, - op, - addr.GetStructAddress(lane)); - } - } - - // We print the whole register, even for stores. - for (int i = 0; i < reg_count; i++) { - vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged(); - } -} - -void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) { - VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0)); - - // Scalar-format accesses are split into separate chunks, each of which uses a - // simple format: - // "# p{code}<15:0>: 0b{value} -> {address}" - // "# p{code}<31:16>: 0b{value} -> {address + 2}" - // "# p{code}<47:32>: 0b{value} -> {address + 4}" - // etc - - int vl = GetVectorLengthInBits(); - VIXL_ASSERT((vl % kQRegSize) == 0); - for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) { - // Suppress the newline, so the access annotation goes on the same line. - PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, ""); - fprintf(stream_, - " %s %s0x%016" PRIxPTR "%s\n", - op, - clr_memory_address, - address, - clr_normal); - address += kQRegSizeInBytes; - } -} -void Simulator::PrintRead(int rt_code, - PrintRegisterFormat format, - uintptr_t address) { +void Simulator::PrintWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - registers_[rt_code].NotifyRegisterLogged(); - PrintAccess(rt_code, format, "<-", address); -} - -void Simulator::PrintExtendingRead(int rt_code, - PrintRegisterFormat format, - int access_size_in_bytes, - uintptr_t address) { - int reg_size_in_bytes = GetPrintRegSizeInBytes(format); - if (access_size_in_bytes == reg_size_in_bytes) { - // There is no extension here, so print a simple load. - PrintRead(rt_code, format, address); - return; - } - VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes); - // For sign- and zero-extension, make it clear that the resulting register - // value is different from what is loaded from memory. - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - registers_[rt_code].NotifyRegisterLogged(); - PrintRegister(rt_code, format); - PrintPartialAccess(1, - 0, - 1, - access_size_in_bytes, - "<-", - address, - kXRegSizeInBytes); -} - -void Simulator::PrintVRead(int rt_code, - PrintRegisterFormat format, - uintptr_t address) { - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - vregisters_[rt_code].NotifyRegisterLogged(); - PrintVAccess(rt_code, format, "<-", address); + // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy + // and readable, the value is aligned with the values in the register trace. + PrintRegisterRawHelper(reg_code, + Reg31IsZeroRegister, + GetPrintRegSizeInBytes(format)); + fprintf(stream_, + " -> %s0x%016" PRIxPTR "%s\n", + clr_memory_address, + address, + clr_normal); } -void Simulator::PrintWrite(int rt_code, - PrintRegisterFormat format, - uintptr_t address) { - // Because this trace doesn't represent a change to the source register's - // value, only print the relevant part of the value. - format = GetPrintRegPartial(format); - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - registers_[rt_code].NotifyRegisterLogged(); - PrintAccess(rt_code, format, "->", address); -} -void Simulator::PrintVWrite(int rt_code, +void Simulator::PrintVWrite(uintptr_t address, + unsigned reg_code, PrintRegisterFormat format, - uintptr_t address) { + unsigned lane) { + // The templates: + // "# v{code}: 0x{rawbits} -> {address}" + // "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}". + // "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}" // Because this trace doesn't represent a change to the source register's - // value, only print the relevant part of the value. - format = GetPrintRegPartial(format); - // It only makes sense to write scalar values here. Vectors are handled by - // PrintVStructAccess. - VIXL_ASSERT(GetPrintRegLaneCount(format) == 1); - PrintVAccess(rt_code, format, "->", address); + // value, only the relevant part of the value is printed. To keep the trace + // tidy and readable, the raw value is aligned with the other values in the + // register trace. + int lane_count = GetPrintRegLaneCount(format); + int lane_size = GetPrintRegLaneSizeInBytes(format); + int reg_size = GetPrintRegSizeInBytes(format); + PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); + if (format & kPrintRegAsFP) { + PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); + } + fprintf(stream_, + " -> %s0x%016" PRIxPTR "%s\n", + clr_memory_address, + address, + clr_normal); } + void Simulator::PrintTakenBranch(const Instruction* target) { fprintf(stream_, "# %sBranch%s to 0x%016" PRIx64 ".\n", @@ -1575,6 +1047,7 @@ void Simulator::PrintTakenBranch(const Instruction* target) { reinterpret_cast<uint64_t>(target)); } + // Visitors--------------------------------------------------------------------- @@ -1816,7 +1289,7 @@ void Simulator::VisitAddSubShifted(const Instruction* instr) { void Simulator::VisitAddSubImmediate(const Instruction* instr) { int64_t op2 = instr->GetImmAddSub() - << ((instr->GetImmAddSubShift() == 1) ? 12 : 0); + << ((instr->GetShiftAddSub() == 1) ? 12 : 0); AddSubHelper(instr, op2); } @@ -2016,7 +1489,7 @@ void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) { // Approximate load-acquire by issuing a full barrier after the load. __sync_synchronize(); - LogRead(rt, GetPrintRegisterFormat(element_size), address); + LogRead(address, rt, GetPrintRegisterFormat(element_size)); } @@ -2043,7 +1516,7 @@ void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) { Memory::Write<T>(address, ReadRegister<T>(rt)); - LogWrite(rt, GetPrintRegisterFormat(element_size), address); + LogWrite(address, rt, GetPrintRegisterFormat(element_size)); } @@ -2130,7 +1603,7 @@ void Simulator::VisitLoadStorePAC(const Instruction* instr) { WriteXRegister(dst, Memory::Read<uint64_t>(addr_ptr), NoRegLog); unsigned access_size = 1 << 3; - LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr); + LogRead(addr_ptr, dst, GetPrintRegisterFormatForSize(access_size)); } @@ -2151,65 +1624,49 @@ void Simulator::LoadStoreHelper(const Instruction* instr, unsigned srcdst = instr->GetRt(); uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode); - bool rt_is_vreg = false; - int extend_to_size = 0; LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask)); switch (op) { case LDRB_w: WriteWRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog); - extend_to_size = kWRegSizeInBytes; break; case LDRH_w: WriteWRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog); - extend_to_size = kWRegSizeInBytes; break; case LDR_w: WriteWRegister(srcdst, Memory::Read<uint32_t>(address), NoRegLog); - extend_to_size = kWRegSizeInBytes; break; case LDR_x: WriteXRegister(srcdst, Memory::Read<uint64_t>(address), NoRegLog); - extend_to_size = kXRegSizeInBytes; break; case LDRSB_w: WriteWRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog); - extend_to_size = kWRegSizeInBytes; break; case LDRSH_w: WriteWRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog); - extend_to_size = kWRegSizeInBytes; break; case LDRSB_x: WriteXRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog); - extend_to_size = kXRegSizeInBytes; break; case LDRSH_x: WriteXRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog); - extend_to_size = kXRegSizeInBytes; break; case LDRSW_x: WriteXRegister(srcdst, Memory::Read<int32_t>(address), NoRegLog); - extend_to_size = kXRegSizeInBytes; break; case LDR_b: WriteBRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog); - rt_is_vreg = true; break; case LDR_h: WriteHRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog); - rt_is_vreg = true; break; case LDR_s: WriteSRegister(srcdst, Memory::Read<float>(address), NoRegLog); - rt_is_vreg = true; break; case LDR_d: WriteDRegister(srcdst, Memory::Read<double>(address), NoRegLog); - rt_is_vreg = true; break; case LDR_q: WriteQRegister(srcdst, Memory::Read<qreg_t>(address), NoRegLog); - rt_is_vreg = true; break; case STRB_w: @@ -2226,23 +1683,18 @@ void Simulator::LoadStoreHelper(const Instruction* instr, break; case STR_b: Memory::Write<uint8_t>(address, ReadBRegister(srcdst)); - rt_is_vreg = true; break; case STR_h: Memory::Write<uint16_t>(address, ReadHRegisterBits(srcdst)); - rt_is_vreg = true; break; case STR_s: Memory::Write<float>(address, ReadSRegister(srcdst)); - rt_is_vreg = true; break; case STR_d: Memory::Write<double>(address, ReadDRegister(srcdst)); - rt_is_vreg = true; break; case STR_q: Memory::Write<qreg_t>(address, ReadQRegister(srcdst)); - rt_is_vreg = true; break; // Ignore prfm hint instructions. @@ -2253,25 +1705,22 @@ void Simulator::LoadStoreHelper(const Instruction* instr, VIXL_UNIMPLEMENTED(); } - // Print a detailed trace (including the memory address). - bool extend = (extend_to_size != 0); unsigned access_size = 1 << instr->GetSizeLS(); - unsigned result_size = extend ? extend_to_size : access_size; - PrintRegisterFormat print_format = - rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size) - : GetPrintRegisterFormatForSize(result_size); - if (instr->IsLoad()) { - if (rt_is_vreg) { - LogVRead(srcdst, print_format, address); + if ((op == LDR_s) || (op == LDR_d)) { + LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); + } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) { + LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); } else { - LogExtendingRead(srcdst, print_format, access_size, address); + LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size)); } } else if (instr->IsStore()) { - if (rt_is_vreg) { - LogVWrite(srcdst, print_format, address); + if ((op == STR_s) || (op == STR_d)) { + LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size)); + } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) { + LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); } else { - LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address); + LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size)); } } else { VIXL_ASSERT(op == PRFM); @@ -2316,8 +1765,6 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, // 'rt' and 'rt2' can only be aliased for stores. VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2)); - bool rt_is_vreg = false; - bool sign_extend = false; switch (op) { // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We // will print a more detailed log. @@ -2329,7 +1776,6 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case LDP_s: { WriteSRegister(rt, Memory::Read<float>(address), NoRegLog); WriteSRegister(rt2, Memory::Read<float>(address2), NoRegLog); - rt_is_vreg = true; break; } case LDP_x: { @@ -2340,19 +1786,16 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case LDP_d: { WriteDRegister(rt, Memory::Read<double>(address), NoRegLog); WriteDRegister(rt2, Memory::Read<double>(address2), NoRegLog); - rt_is_vreg = true; break; } case LDP_q: { WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog); WriteQRegister(rt2, Memory::Read<qreg_t>(address2), NoRegLog); - rt_is_vreg = true; break; } case LDPSW_x: { WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog); WriteXRegister(rt2, Memory::Read<int32_t>(address2), NoRegLog); - sign_extend = true; break; } case STP_w: { @@ -2363,7 +1806,6 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case STP_s: { Memory::Write<float>(address, ReadSRegister(rt)); Memory::Write<float>(address2, ReadSRegister(rt2)); - rt_is_vreg = true; break; } case STP_x: { @@ -2374,43 +1816,40 @@ void Simulator::LoadStorePairHelper(const Instruction* instr, case STP_d: { Memory::Write<double>(address, ReadDRegister(rt)); Memory::Write<double>(address2, ReadDRegister(rt2)); - rt_is_vreg = true; break; } case STP_q: { Memory::Write<qreg_t>(address, ReadQRegister(rt)); Memory::Write<qreg_t>(address2, ReadQRegister(rt2)); - rt_is_vreg = true; break; } default: VIXL_UNREACHABLE(); } - // Print a detailed trace (including the memory address). - unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size; - PrintRegisterFormat print_format = - rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size) - : GetPrintRegisterFormatForSize(result_size); - + // Print a detailed trace (including the memory address) instead of the basic + // register:value trace generated by set_*reg(). if (instr->IsLoad()) { - if (rt_is_vreg) { - LogVRead(rt, print_format, address); - LogVRead(rt2, print_format, address2); - } else if (sign_extend) { - LogExtendingRead(rt, print_format, element_size, address); - LogExtendingRead(rt2, print_format, element_size, address2); + if ((op == LDP_s) || (op == LDP_d)) { + LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); + LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); + } else if (op == LDP_q) { + LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); } else { - LogRead(rt, print_format, address); - LogRead(rt2, print_format, address2); + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size)); } } else { - if (rt_is_vreg) { - LogVWrite(rt, print_format, address); - LogVWrite(rt2, print_format, address2); + if ((op == STP_s) || (op == STP_d)) { + LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size)); + LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size)); + } else if (op == STP_q) { + LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); } else { - LogWrite(rt, print_format, address); - LogWrite(rt2, print_format, address2); + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size)); } } @@ -2451,10 +1890,10 @@ void Simulator::CompareAndSwapHelper(const Instruction* instr) { __sync_synchronize(); } Memory::Write<T>(address, newvalue); - LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address); + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); } - WriteRegister<T>(rs, data, NoRegLog); - LogRead(rs, GetPrintRegisterFormatForSize(element_size), address); + WriteRegister<T>(rs, data); + LogRead(address, rs, GetPrintRegisterFormatForSize(element_size)); } @@ -2465,7 +1904,7 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { unsigned rt = instr->GetRt(); unsigned rn = instr->GetRn(); - VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0)); + VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0)); unsigned element_size = sizeof(T); uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer); @@ -2486,8 +1925,8 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { // associated with that location, even if the compare subsequently fails. local_monitor_.Clear(); - T data_low = Memory::Read<T>(address); - T data_high = Memory::Read<T>(address2); + T data_high = Memory::Read<T>(address); + T data_low = Memory::Read<T>(address2); if (is_acquire) { // Approximate load-acquire by issuing a full barrier after the load. @@ -2502,83 +1941,23 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) { __sync_synchronize(); } - Memory::Write<T>(address, newvalue_low); - Memory::Write<T>(address2, newvalue_high); + Memory::Write<T>(address, newvalue_high); + Memory::Write<T>(address2, newvalue_low); } - WriteRegister<T>(rs + 1, data_high, NoRegLog); - WriteRegister<T>(rs, data_low, NoRegLog); + WriteRegister<T>(rs + 1, data_high); + WriteRegister<T>(rs, data_low); - PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); - LogRead(rs, format, address); - LogRead(rs + 1, format, address2); + LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size)); + LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size)); if (same) { - LogWrite(rt, format, address); - LogWrite(rt + 1, format, address2); - } -} - -bool Simulator::CanReadMemory(uintptr_t address, size_t size) { - // To simulate fault-tolerant loads, we need to know what host addresses we - // can access without generating a real fault. One way to do that is to - // attempt to `write()` the memory to a dummy pipe[1]. This is more portable - // and less intrusive than using (global) signal handlers. - // - // [1]: https://stackoverflow.com/questions/7134590 - - size_t written = 0; - bool can_read = true; - // `write` will normally return after one invocation, but it is allowed to - // handle only part of the operation, so wrap it in a loop. - while (can_read && (written < size)) { - ssize_t result = write(dummy_pipe_fd_[1], - reinterpret_cast<void*>(address + written), - size - written); - if (result > 0) { - written += result; - } else { - switch (result) { - case -EPERM: - case -EFAULT: - // The address range is not accessible. - // `write` is supposed to return -EFAULT in this case, but in practice - // it seems to return -EPERM, so we accept that too. - can_read = false; - break; - case -EINTR: - // The call was interrupted by a signal. Just try again. - break; - default: - // Any other error is fatal. - VIXL_ABORT(); - } - } - } - // Drain the read side of the pipe. If we don't do this, we'll leak memory as - // the dummy data is buffered. As before, we expect to drain the whole write - // in one invocation, but cannot guarantee that, so we wrap it in a loop. This - // function is primarily intended to implement SVE fault-tolerant loads, so - // the maximum Z register size is a good default buffer size. - char buffer[kZRegMaxSizeInBytes]; - while (written > 0) { - ssize_t result = read(dummy_pipe_fd_[0], - reinterpret_cast<void*>(buffer), - sizeof(buffer)); - // `read` blocks, and returns 0 only at EOF. We should not hit EOF until - // we've read everything that was written, so treat 0 as an error. - if (result > 0) { - VIXL_ASSERT(static_cast<size_t>(result) <= written); - written -= result; - } else { - // For -EINTR, just try again. We can't handle any other error. - VIXL_CHECK(result == -EINTR); - } + LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size)); } - - return can_read; } + void Simulator::PrintExclusiveAccessWarning() { if (print_exclusive_access_warning_) { fprintf(stderr, @@ -2592,6 +1971,7 @@ void Simulator::PrintExclusiveAccessWarning() { } } + void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { LoadStoreExclusive op = static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask)); @@ -2665,35 +2045,30 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). // We will print a more detailed log. - unsigned reg_size = 0; switch (op) { case LDXRB_w: case LDAXRB_w: case LDARB_w: case LDLARB: WriteWRegister(rt, Memory::Read<uint8_t>(address), NoRegLog); - reg_size = kWRegSizeInBytes; break; case LDXRH_w: case LDAXRH_w: case LDARH_w: case LDLARH: WriteWRegister(rt, Memory::Read<uint16_t>(address), NoRegLog); - reg_size = kWRegSizeInBytes; break; case LDXR_w: case LDAXR_w: case LDAR_w: case LDLAR_w: WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog); - reg_size = kWRegSizeInBytes; break; case LDXR_x: case LDAXR_x: case LDAR_x: case LDLAR_x: WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog); - reg_size = kXRegSizeInBytes; break; case LDXP_w: case LDAXP_w: @@ -2701,7 +2076,6 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { WriteWRegister(rt2, Memory::Read<uint32_t>(address + element_size), NoRegLog); - reg_size = kWRegSizeInBytes; break; case LDXP_x: case LDAXP_x: @@ -2709,7 +2083,6 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { WriteXRegister(rt2, Memory::Read<uint64_t>(address + element_size), NoRegLog); - reg_size = kXRegSizeInBytes; break; default: VIXL_UNREACHABLE(); @@ -2720,10 +2093,11 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { __sync_synchronize(); } - PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size); - LogExtendingRead(rt, format, element_size, address); + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); if (is_pair) { - LogExtendingRead(rt2, format, element_size, address + element_size); + LogRead(address + element_size, + rt2, + GetPrintRegisterFormatForSize(element_size)); } } else { if (is_acquire_release) { @@ -2787,11 +2161,11 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) { VIXL_UNREACHABLE(); } - PrintRegisterFormat format = - GetPrintRegisterFormatForSize(element_size); - LogWrite(rt, format, address); + LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size)); if (is_pair) { - LogWrite(rt2, format, address + element_size); + LogWrite(address + element_size, + rt2, + GetPrintRegisterFormatForSize(element_size)); } } } @@ -2858,9 +2232,8 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) { Memory::Write<T>(address, result); WriteRegister<T>(rt, data, NoRegLog); - PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); - LogRead(rt, format, address); - LogWrite(rs, format, address); + LogRead(address, rt, GetPrintRegisterFormatForSize(element_size)); + LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size)); } template <typename T> @@ -2891,9 +2264,8 @@ void Simulator::AtomicMemorySwapHelper(const Instruction* instr) { WriteRegister<T>(rt, data); - PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size); - LogRead(rt, format, address); - LogWrite(rs, format, address); + LogRead(address, rt, GetPrintRegisterFormat(element_size)); + LogWrite(address, rs, GetPrintRegisterFormat(element_size)); } template <typename T> @@ -2911,7 +2283,7 @@ void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) { // Approximate load-acquire by issuing a full barrier after the load. __sync_synchronize(); - LogRead(rt, GetPrintRegisterFormatForSize(element_size), address); + LogRead(address, rt, GetPrintRegisterFormat(element_size)); } #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \ @@ -3028,27 +2400,27 @@ void Simulator::VisitLoadLiteral(const Instruction* instr) { // print a more detailed log. case LDR_w_lit: WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog); - LogRead(rt, kPrintWReg, address); + LogRead(address, rt, kPrintWReg); break; case LDR_x_lit: WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog); - LogRead(rt, kPrintXReg, address); + LogRead(address, rt, kPrintXReg); break; case LDR_s_lit: WriteSRegister(rt, Memory::Read<float>(address), NoRegLog); - LogVRead(rt, kPrintSRegFP, address); + LogVRead(address, rt, kPrintSReg); break; case LDR_d_lit: WriteDRegister(rt, Memory::Read<double>(address), NoRegLog); - LogVRead(rt, kPrintDRegFP, address); + LogVRead(address, rt, kPrintDReg); break; case LDR_q_lit: WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog); - LogVRead(rt, kPrintReg1Q, address); + LogVRead(address, rt, kPrintReg1Q); break; case LDRSW_x_lit: WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog); - LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address); + LogRead(address, rt, kPrintWReg); break; // Ignore prfm hint instructions. @@ -3423,6 +2795,40 @@ void Simulator::VisitDataProcessing2Source(const Instruction* instr) { } +// The algorithm used is adapted from the one described in section 8.2 of +// Hacker's Delight, by Henry S. Warren, Jr. +template <typename T> +static int64_t MultiplyHigh(T u, T v) { + uint64_t u0, v0, w0, u1, v1, w1, w2, t; + uint64_t sign_mask = UINT64_C(0x8000000000000000); + uint64_t sign_ext = 0; + if (std::numeric_limits<T>::is_signed) { + sign_ext = UINT64_C(0xffffffff00000000); + } + + VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); + VIXL_ASSERT(sizeof(u) == sizeof(u0)); + + u0 = u & 0xffffffff; + u1 = u >> 32 | (((u & sign_mask) != 0) ? sign_ext : 0); + v0 = v & 0xffffffff; + v1 = v >> 32 | (((v & sign_mask) != 0) ? sign_ext : 0); + + w0 = u0 * v0; + t = u1 * v0 + (w0 >> 32); + + w1 = t & 0xffffffff; + w2 = t >> 32 | (((t & sign_mask) != 0) ? sign_ext : 0); + w1 = u0 * v1 + w1; + w1 = w1 >> 32 | (((w1 & sign_mask) != 0) ? sign_ext : 0); + + uint64_t value = u1 * v1 + w2 + w1; + int64_t result; + memcpy(&result, &value, sizeof(result)); + return result; +} + + void Simulator::VisitDataProcessing3Source(const Instruction* instr) { unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize; @@ -3458,13 +2864,12 @@ void Simulator::VisitDataProcessing3Source(const Instruction* instr) { result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32); break; case UMULH_x: - result = - internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()), - ReadRegister<uint64_t>(instr->GetRm())); + result = MultiplyHigh(ReadRegister<uint64_t>(instr->GetRn()), + ReadRegister<uint64_t>(instr->GetRm())); break; case SMULH_x: - result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()), - ReadXRegister(instr->GetRm())); + result = MultiplyHigh(ReadXRegister(instr->GetRn()), + ReadXRegister(instr->GetRm())); break; default: VIXL_UNIMPLEMENTED(); @@ -3531,10 +2936,9 @@ void Simulator::VisitExtract(const Instruction* instr) { unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize; uint64_t low_res = static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb; - uint64_t high_res = (lsb == 0) - ? 0 - : ReadRegister<uint64_t>(reg_size, instr->GetRn()) - << (reg_size - lsb); + uint64_t high_res = + (lsb == 0) ? 0 : ReadRegister<uint64_t>(reg_size, instr->GetRn()) + << (reg_size - lsb); WriteRegister(reg_size, instr->GetRd(), low_res | high_res); } @@ -4544,8 +3948,8 @@ void Simulator::VisitSystem(const Instruction* instr) { break; case RNDR: case RNDRRS: { - uint64_t high = jrand48(rand_state_); - uint64_t low = jrand48(rand_state_); + uint64_t high = jrand48(rndr_state_); + uint64_t low = jrand48(rndr_state_); uint64_t rand_num = (high << 32) | (low & 0xffffffff); WriteXRegister(instr->GetRt(), rand_num); // Simulate successful random number generation. @@ -5126,10 +4530,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) { fminnm(vf, rd, rn, rm); break; case NEON_FMLA: - fmla(vf, rd, rd, rn, rm); + fmla(vf, rd, rn, rm); break; case NEON_FMLS: - fmls(vf, rd, rd, rn, rm); + fmls(vf, rd, rn, rm); break; case NEON_FMULX: fmulx(vf, rd, rn, rm); @@ -5220,10 +4624,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) { cmptst(vf, rd, rn, rm); break; case NEON_MLS: - mls(vf, rd, rd, rn, rm); + mls(vf, rd, rn, rm); break; case NEON_MLA: - mla(vf, rd, rd, rn, rm); + mla(vf, rd, rn, rm); break; case NEON_MUL: mul(vf, rd, rn, rm); @@ -5350,11 +4754,13 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) { B(vf, rd, rn, rm); \ break; SIM_FUNC(FMAXNM, fmaxnm); + SIM_FUNC(FMLA, fmla); SIM_FUNC(FADD, fadd); SIM_FUNC(FMULX, fmulx); SIM_FUNC(FMAX, fmax); SIM_FUNC(FRECPS, frecps); SIM_FUNC(FMINNM, fminnm); + SIM_FUNC(FMLS, fmls); SIM_FUNC(FSUB, fsub); SIM_FUNC(FMIN, fmin); SIM_FUNC(FRSQRTS, frsqrts); @@ -5367,12 +4773,6 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) { SIM_FUNC(FABD, fabd); SIM_FUNC(FMINP, fminp); #undef SIM_FUNC - case NEON_FMLA_H: - fmla(vf, rd, rd, rn, rm); - break; - case NEON_FMLS_H: - fmls(vf, rd, rd, rn, rm); - break; case NEON_FCMEQ_H: fcmp(vf, rd, rn, rm, eq); break; @@ -5403,7 +4803,7 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) { VectorFormat vf = nfd.GetVectorFormat(); if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) { rot = instr->GetImmRotFcmlaVec(); - fcmla(vf, rd, rn, rm, rd, rot); + fcmla(vf, rd, rn, rm, rot); } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) { rot = instr->GetImmRotFcadd(); fcadd(vf, rd, rn, rm, rot); @@ -5947,8 +5347,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters; addr[i] = addr_base + (i * reg_size); } - int struct_parts = 1; - int reg_count = 1; + int count = 1; bool log_read = true; // Bit 23 determines whether this is an offset or post-index addressing mode. @@ -5964,17 +5363,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_LD1_4v: case NEON_LD1_4v_post: ld1(vf, ReadVRegister(reg[3]), addr[3]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_LD1_3v: case NEON_LD1_3v_post: ld1(vf, ReadVRegister(reg[2]), addr[2]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_LD1_2v: case NEON_LD1_2v_post: ld1(vf, ReadVRegister(reg[1]), addr[1]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_LD1_1v: case NEON_LD1_1v_post: @@ -5983,17 +5382,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_ST1_4v: case NEON_ST1_4v_post: st1(vf, ReadVRegister(reg[3]), addr[3]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_ST1_3v: case NEON_ST1_3v_post: st1(vf, ReadVRegister(reg[2]), addr[2]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_ST1_2v: case NEON_ST1_2v_post: st1(vf, ReadVRegister(reg[1]), addr[1]); - reg_count++; + count++; VIXL_FALLTHROUGH(); case NEON_ST1_1v: case NEON_ST1_1v_post: @@ -6003,14 +5402,12 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, case NEON_LD2_post: case NEON_LD2: ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]); - struct_parts = 2; - reg_count = 2; + count = 2; break; case NEON_ST2: case NEON_ST2_post: st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]); - struct_parts = 2; - reg_count = 2; + count = 2; log_read = false; break; case NEON_LD3_post: @@ -6020,8 +5417,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[1]), ReadVRegister(reg[2]), addr[0]); - struct_parts = 3; - reg_count = 3; + count = 3; break; case NEON_ST3: case NEON_ST3_post: @@ -6030,8 +5426,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[1]), ReadVRegister(reg[2]), addr[0]); - struct_parts = 3; - reg_count = 3; + count = 3; log_read = false; break; case NEON_ST4: @@ -6042,8 +5437,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[2]), ReadVRegister(reg[3]), addr[0]); - struct_parts = 4; - reg_count = 4; + count = 4; log_read = false; break; case NEON_LD4_post: @@ -6054,31 +5448,22 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, ReadVRegister(reg[2]), ReadVRegister(reg[3]), addr[0]); - struct_parts = 4; - reg_count = 4; + count = 4; break; default: VIXL_UNIMPLEMENTED(); } - bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites(); - if (do_trace) { - PrintRegisterFormat print_format = - GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); - const char* op; + // Explicitly log the register update whilst we have type information. + for (int i = 0; i < count; i++) { + // For de-interleaving loads, only print the base address. + int lane_size = LaneSizeInBytesFromFormat(vf); + PrintRegisterFormat format = GetPrintRegisterFormatTryFP( + GetPrintRegisterFormatForSize(reg_size, lane_size)); if (log_read) { - op = "<-"; + LogVRead(addr_base, reg[i], format); } else { - op = "->"; - // Stores don't represent a change to the source register's value, so only - // print the relevant part of the value. - print_format = GetPrintRegPartial(print_format); - } - - VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1)); - for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) { - uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf)); - PrintVStructAccess(reg[s], struct_parts, print_format, op, address); + LogVWrite(addr_base, reg[i], format); } } @@ -6086,7 +5471,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr, int rm = instr->GetRm(); // The immediate post index addressing mode is indicated by rm = 31. // The immediate is implied by the number of vector registers used. - addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count) + addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count : ReadXRegister(rm); WriteXRegister(instr->GetRn(), addr_base); } else { @@ -6122,8 +5507,6 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, // and PostIndex addressing. bool do_load = false; - bool replicating = false; - NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); VectorFormat vf_t = nfd.GetVectorFormat(); @@ -6198,67 +5581,99 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, } case NEON_LD1R: - case NEON_LD1R_post: + case NEON_LD1R_post: { + vf = vf_t; + ld1r(vf, ReadVRegister(rt), addr); + do_load = true; + break; + } + case NEON_LD2R: - case NEON_LD2R_post: + case NEON_LD2R_post: { + vf = vf_t; + int rt2 = (rt + 1) % kNumberOfVRegisters; + ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr); + do_load = true; + break; + } + case NEON_LD3R: - case NEON_LD3R_post: - case NEON_LD4R: - case NEON_LD4R_post: + case NEON_LD3R_post: { vf = vf_t; + int rt2 = (rt + 1) % kNumberOfVRegisters; + int rt3 = (rt2 + 1) % kNumberOfVRegisters; + ld3r(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), addr); do_load = true; - replicating = true; break; + } + case NEON_LD4R: + case NEON_LD4R_post: { + vf = vf_t; + int rt2 = (rt + 1) % kNumberOfVRegisters; + int rt3 = (rt2 + 1) % kNumberOfVRegisters; + int rt4 = (rt3 + 1) % kNumberOfVRegisters; + ld4r(vf, + ReadVRegister(rt), + ReadVRegister(rt2), + ReadVRegister(rt3), + ReadVRegister(rt4), + addr); + do_load = true; + break; + } default: VIXL_UNIMPLEMENTED(); } + PrintRegisterFormat print_format = + GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); + // Make sure that the print_format only includes a single lane. + print_format = + static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask); + + int esize = LaneSizeInBytesFromFormat(vf); int index_shift = LaneSizeInBytesLog2FromFormat(vf); int lane = instr->GetNEONLSIndex(index_shift); - int reg_count = 0; + int scale = 0; int rt2 = (rt + 1) % kNumberOfVRegisters; int rt3 = (rt2 + 1) % kNumberOfVRegisters; int rt4 = (rt3 + 1) % kNumberOfVRegisters; switch (instr->Mask(NEONLoadStoreSingleLenMask)) { case NEONLoadStoreSingle1: - reg_count = 1; - if (replicating) { - VIXL_ASSERT(do_load); - ld1r(vf, ReadVRegister(rt), addr); - } else if (do_load) { + scale = 1; + if (do_load) { ld1(vf, ReadVRegister(rt), lane, addr); + LogVRead(addr, rt, print_format, lane); } else { st1(vf, ReadVRegister(rt), lane, addr); + LogVWrite(addr, rt, print_format, lane); } break; case NEONLoadStoreSingle2: - reg_count = 2; - if (replicating) { - VIXL_ASSERT(do_load); - ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr); - } else if (do_load) { + scale = 2; + if (do_load) { ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr); + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); } else { st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); } break; case NEONLoadStoreSingle3: - reg_count = 3; - if (replicating) { - VIXL_ASSERT(do_load); - ld3r(vf, - ReadVRegister(rt), - ReadVRegister(rt2), - ReadVRegister(rt3), - addr); - } else if (do_load) { + scale = 3; + if (do_load) { ld3(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), lane, addr); + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); + LogVRead(addr + (2 * esize), rt3, print_format, lane); } else { st3(vf, ReadVRegister(rt), @@ -6266,19 +5681,14 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt3), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); + LogVWrite(addr + (2 * esize), rt3, print_format, lane); } break; case NEONLoadStoreSingle4: - reg_count = 4; - if (replicating) { - VIXL_ASSERT(do_load); - ld4r(vf, - ReadVRegister(rt), - ReadVRegister(rt2), - ReadVRegister(rt3), - ReadVRegister(rt4), - addr); - } else if (do_load) { + scale = 4; + if (do_load) { ld4(vf, ReadVRegister(rt), ReadVRegister(rt2), @@ -6286,6 +5696,10 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt4), lane, addr); + LogVRead(addr, rt, print_format, lane); + LogVRead(addr + esize, rt2, print_format, lane); + LogVRead(addr + (2 * esize), rt3, print_format, lane); + LogVRead(addr + (3 * esize), rt4, print_format, lane); } else { st4(vf, ReadVRegister(rt), @@ -6294,38 +5708,22 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr, ReadVRegister(rt4), lane, addr); + LogVWrite(addr, rt, print_format, lane); + LogVWrite(addr + esize, rt2, print_format, lane); + LogVWrite(addr + (2 * esize), rt3, print_format, lane); + LogVWrite(addr + (3 * esize), rt4, print_format, lane); } break; default: VIXL_UNIMPLEMENTED(); } - // Trace registers and/or memory writes. - PrintRegisterFormat print_format = - GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf)); - if (do_load) { - if (ShouldTraceVRegs()) { - if (replicating) { - PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr); - } else { - PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr); - } - } - } else { - if (ShouldTraceWrites()) { - // Stores don't represent a change to the source register's value, so only - // print the relevant part of the value. - print_format = GetPrintRegPartial(print_format); - PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr); - } - } - if (addr_mode == PostIndex) { int rm = instr->GetRm(); int lane_size = LaneSizeInBytesFromFormat(vf); WriteXRegister(instr->GetRn(), - addr + ((rm == 31) ? (reg_count * lane_size) - : ReadXRegister(rm))); + addr + + ((rm == 31) ? (scale * lane_size) : ReadXRegister(rm))); } } @@ -7023,10 +6421,10 @@ void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) { NEONFormatDecoder nfd(instr, &map); VectorFormat vf = nfd.GetVectorFormat(); - int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh()); - int immh_immb = instr->GetImmNEONImmhImmb(); - int right_shift = (16 << highest_set_bit) - immh_immb; - int left_shift = immh_immb - (8 << highest_set_bit); + int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh()); + int immhimmb = instr->GetImmNEONImmhImmb(); + int right_shift = (16 << highestSetBit) - immhimmb; + int left_shift = immhimmb - (8 << highestSetBit); switch (instr->Mask(NEONScalarShiftImmediateMask)) { case NEON_SHL_scalar: shl(vf, rd, rn, left_shift); @@ -7131,10 +6529,10 @@ void Simulator::VisitNEONShiftImmediate(const Instruction* instr) { {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; VectorFormat vf_l = nfd.GetVectorFormat(&map_l); - int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh()); - int immh_immb = instr->GetImmNEONImmhImmb(); - int right_shift = (16 << highest_set_bit) - immh_immb; - int left_shift = immh_immb - (8 << highest_set_bit); + int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh()); + int immhimmb = instr->GetImmNEONImmhImmb(); + int right_shift = (16 << highestSetBit) - immhimmb; + int left_shift = immhimmb - (8 << highestSetBit); switch (instr->Mask(NEONShiftImmediateMask)) { case NEON_SHL: @@ -7343,4356 +6741,6 @@ void Simulator::VisitNEONPerm(const Instruction* instr) { } } -void Simulator::VisitSVEAddressGeneration(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - SimVRegister temp; - - VectorFormat vform = kFormatVnD; - mov(vform, temp, zm); - - switch (instr->Mask(SVEAddressGenerationMask)) { - case ADR_z_az_d_s32_scaled: - sxt(vform, temp, temp, kSRegSize); - break; - case ADR_z_az_d_u32_scaled: - uxt(vform, temp, temp, kSRegSize); - break; - case ADR_z_az_s_same_scaled: - vform = kFormatVnS; - break; - case ADR_z_az_d_same_scaled: - // Nothing to do. - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - int shift_amount = instr->ExtractBits(11, 10); - shl(vform, temp, temp, shift_amount); - add(vform, zd, zn, temp); -} - -void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated( - const Instruction* instr) { - Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask); - switch (op) { - case AND_z_zi: - case EOR_z_zi: - case ORR_z_zi: { - int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); - uint64_t imm = instr->GetSVEImmLogical(); - // Valid immediate is a non-zero bits - VIXL_ASSERT(imm != 0); - SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>( - op), - SVEFormatFromLaneSizeInBytesLog2(lane_size), - ReadVRegister(instr->GetRd()), - imm); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) { - switch (instr->Mask(SVEBroadcastBitmaskImmMask)) { - case DUPM_z_i: { - /* DUPM uses the same lane size and immediate encoding as bitwise logical - * immediate instructions. */ - int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); - uint64_t imm = instr->GetSVEImmLogical(); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); - dup_immediate(vform, ReadVRegister(instr->GetRd()), imm); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask); - - LogicalOp logical_op; - switch (op) { - case AND_z_zz: - logical_op = AND; - break; - case BIC_z_zz: - logical_op = BIC; - break; - case EOR_z_zz: - logical_op = EOR; - break; - case ORR_z_zz: - logical_op = ORR; - break; - default: - logical_op = LogicalOpMask; - VIXL_UNIMPLEMENTED(); - break; - } - // Lane size of registers is irrelevant to the bitwise operations, so perform - // the operation on D-sized lanes. - SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm); -} - -void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) { - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - SimVRegister scratch; - SimVRegister result; - - bool for_division = false; - Shift shift_op = NO_SHIFT; - switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) { - case ASRD_z_p_zi: - shift_op = ASR; - for_division = true; - break; - case ASR_z_p_zi: - shift_op = ASR; - break; - case LSL_z_p_zi: - shift_op = LSL; - break; - case LSR_z_p_zi: - shift_op = LSR; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true); - unsigned lane_size = shift_and_lane_size.second; - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); - int shift_dist = shift_and_lane_size.first; - - if ((shift_op == ASR) && for_division) { - asrd(vform, result, zdn, shift_dist); - } else { - if (shift_op == LSL) { - // Shift distance is computed differently for LSL. Convert the result. - shift_dist = (8 << lane_size) - shift_dist; - } - dup_immediate(vform, scratch, shift_dist); - SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false); - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEBitwiseShiftByVector_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - SimVRegister result; - SimVRegister shiftand; // Vector to be shifted. - SimVRegister shiftor; // Vector shift amount. - - Shift shift_op = ASR; - mov(vform, shiftand, zdn); - mov(vform, shiftor, zm); - - switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) { - case ASRR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case ASR_z_p_zz: - break; - case LSLR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case LSL_z_p_zz: - shift_op = LSL; - break; - case LSRR_z_p_zz: - mov(vform, shiftand, zm); - mov(vform, shiftor, zdn); - VIXL_FALLTHROUGH(); - case LSR_z_p_zz: - shift_op = LSR; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - SVEBitwiseShiftHelper(shift_op, - vform, - result, - shiftand, - shiftor, - /* is_wide_elements = */ false); - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - SimVRegister result; - Shift shift_op = ASR; - - switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) { - case ASR_z_p_zw: - break; - case LSL_z_p_zw: - shift_op = LSL; - break; - case LSR_z_p_zw: - shift_op = LSR; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - SVEBitwiseShiftHelper(shift_op, - vform, - result, - zdn, - zm, - /* is_wide_elements = */ true); - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - Shift shift_op; - switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { - case ASR_z_zi: - case ASR_z_zw: - shift_op = ASR; - break; - case LSL_z_zi: - case LSL_z_zw: - shift_op = LSL; - break; - case LSR_z_zi: - case LSR_z_zw: - shift_op = LSR; - break; - default: - shift_op = NO_SHIFT; - VIXL_UNIMPLEMENTED(); - break; - } - - switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) { - case ASR_z_zi: - case LSL_z_zi: - case LSR_z_zi: { - SimVRegister scratch; - std::pair<int, int> shift_and_lane_size = - instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); - unsigned lane_size = shift_and_lane_size.second; - VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size); - int shift_dist = shift_and_lane_size.first; - if (shift_op == LSL) { - // Shift distance is computed differently for LSL. Convert the result. - shift_dist = (8 << lane_size) - shift_dist; - } - dup_immediate(vform, scratch, shift_dist); - SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false); - break; - } - case ASR_z_zw: - case LSL_z_zw: - case LSR_z_zw: - SVEBitwiseShiftHelper(shift_op, - instr->GetSVEVectorFormat(), - zd, - zn, - ReadVRegister(instr->GetRm()), - true); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) { - // Although the instructions have a separate encoding class, the lane size is - // encoded in the same way as most other SVE instructions. - VectorFormat vform = instr->GetSVEVectorFormat(); - - int pattern = instr->GetImmSVEPredicateConstraint(); - int count = GetPredicateConstraintLaneCount(vform, pattern); - int multiplier = instr->ExtractBits(19, 16) + 1; - - switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) { - case DECB_r_rs: - case DECD_r_rs: - case DECH_r_rs: - case DECW_r_rs: - count = -count; - break; - case INCB_r_rs: - case INCD_r_rs: - case INCH_r_rs: - case INCW_r_rs: - // Nothing to do. - break; - default: - VIXL_UNIMPLEMENTED(); - return; - } - - WriteXRegister(instr->GetRd(), - IncDecN(ReadXRegister(instr->GetRd()), - count * multiplier, - kXRegSize)); -} - -void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { - VIXL_UNIMPLEMENTED(); - } - - int pattern = instr->GetImmSVEPredicateConstraint(); - int count = GetPredicateConstraintLaneCount(vform, pattern); - int multiplier = instr->ExtractBits(19, 16) + 1; - - switch (instr->Mask(SVEIncDecVectorByElementCountMask)) { - case DECD_z_zs: - case DECH_z_zs: - case DECW_z_zs: - count = -count; - break; - case INCD_z_zs: - case INCH_z_zs: - case INCW_z_zs: - // Nothing to do. - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister scratch; - dup_immediate(vform, - scratch, - IncDecN(0, - count * multiplier, - LaneSizeInBitsFromFormat(vform))); - add(vform, zd, zd, scratch); -} - -void Simulator::VisitSVESaturatingIncDecRegisterByElementCount( - const Instruction* instr) { - // Although the instructions have a separate encoding class, the lane size is - // encoded in the same way as most other SVE instructions. - VectorFormat vform = instr->GetSVEVectorFormat(); - - int pattern = instr->GetImmSVEPredicateConstraint(); - int count = GetPredicateConstraintLaneCount(vform, pattern); - int multiplier = instr->ExtractBits(19, 16) + 1; - - unsigned width = kXRegSize; - bool is_signed = false; - - switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) { - case SQDECB_r_rs_sx: - case SQDECD_r_rs_sx: - case SQDECH_r_rs_sx: - case SQDECW_r_rs_sx: - width = kWRegSize; - VIXL_FALLTHROUGH(); - case SQDECB_r_rs_x: - case SQDECD_r_rs_x: - case SQDECH_r_rs_x: - case SQDECW_r_rs_x: - is_signed = true; - count = -count; - break; - case SQINCB_r_rs_sx: - case SQINCD_r_rs_sx: - case SQINCH_r_rs_sx: - case SQINCW_r_rs_sx: - width = kWRegSize; - VIXL_FALLTHROUGH(); - case SQINCB_r_rs_x: - case SQINCD_r_rs_x: - case SQINCH_r_rs_x: - case SQINCW_r_rs_x: - is_signed = true; - break; - case UQDECB_r_rs_uw: - case UQDECD_r_rs_uw: - case UQDECH_r_rs_uw: - case UQDECW_r_rs_uw: - width = kWRegSize; - VIXL_FALLTHROUGH(); - case UQDECB_r_rs_x: - case UQDECD_r_rs_x: - case UQDECH_r_rs_x: - case UQDECW_r_rs_x: - count = -count; - break; - case UQINCB_r_rs_uw: - case UQINCD_r_rs_uw: - case UQINCH_r_rs_uw: - case UQINCW_r_rs_uw: - width = kWRegSize; - VIXL_FALLTHROUGH(); - case UQINCB_r_rs_x: - case UQINCD_r_rs_x: - case UQINCH_r_rs_x: - case UQINCW_r_rs_x: - // Nothing to do. - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - WriteXRegister(instr->GetRd(), - IncDecN(ReadXRegister(instr->GetRd()), - count * multiplier, - width, - true, - is_signed)); -} - -void Simulator::VisitSVESaturatingIncDecVectorByElementCount( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { - VIXL_UNIMPLEMENTED(); - } - - int pattern = instr->GetImmSVEPredicateConstraint(); - int count = GetPredicateConstraintLaneCount(vform, pattern); - int multiplier = instr->ExtractBits(19, 16) + 1; - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister scratch; - dup_immediate(vform, - scratch, - IncDecN(0, - count * multiplier, - LaneSizeInBitsFromFormat(vform))); - - switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) { - case SQDECD_z_zs: - case SQDECH_z_zs: - case SQDECW_z_zs: - sub(vform, zd, zd, scratch).SignedSaturate(vform); - break; - case SQINCD_z_zs: - case SQINCH_z_zs: - case SQINCW_z_zs: - add(vform, zd, zd, scratch).SignedSaturate(vform); - break; - case UQDECD_z_zs: - case UQDECH_z_zs: - case UQDECW_z_zs: - sub(vform, zd, zd, scratch).UnsignedSaturate(vform); - break; - case UQINCD_z_zs: - case UQINCH_z_zs: - case UQINCW_z_zs: - add(vform, zd, zd, scratch).UnsignedSaturate(vform); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEElementCount(const Instruction* instr) { - switch (instr->Mask(SVEElementCountMask)) { - case CNTB_r_s: - case CNTD_r_s: - case CNTH_r_s: - case CNTW_r_s: - // All handled below. - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - // Although the instructions are separated, the lane size is encoded in the - // same way as most other SVE instructions. - VectorFormat vform = instr->GetSVEVectorFormat(); - - int pattern = instr->GetImmSVEPredicateConstraint(); - int count = GetPredicateConstraintLaneCount(vform, pattern); - int multiplier = instr->ExtractBits(19, 16) + 1; - WriteXRegister(instr->GetRd(), count * multiplier); -} - -void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& vdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - switch (instr->Mask(SVEFPAccumulatingReductionMask)) { - case FADDA_v_p_z: - fadda(vform, vdn, pg, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - SimVRegister result; - - switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) { - case FABD_z_p_zz: - fabd(vform, result, zdn, zm); - break; - case FADD_z_p_zz: - fadd(vform, result, zdn, zm); - break; - case FDIVR_z_p_zz: - fdiv(vform, result, zm, zdn); - break; - case FDIV_z_p_zz: - fdiv(vform, result, zdn, zm); - break; - case FMAXNM_z_p_zz: - fmaxnm(vform, result, zdn, zm); - break; - case FMAX_z_p_zz: - fmax(vform, result, zdn, zm); - break; - case FMINNM_z_p_zz: - fminnm(vform, result, zdn, zm); - break; - case FMIN_z_p_zz: - fmin(vform, result, zdn, zm); - break; - case FMULX_z_p_zz: - fmulx(vform, result, zdn, zm); - break; - case FMUL_z_p_zz: - fmul(vform, result, zdn, zm); - break; - case FSCALE_z_p_zz: - fscale(vform, result, zdn, zm); - break; - case FSUBR_z_p_zz: - fsub(vform, result, zm, zdn); - break; - case FSUB_z_p_zz: - fsub(vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEFPArithmeticWithImm_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { - VIXL_UNIMPLEMENTED(); - } - - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - int i1 = instr->ExtractBit(5); - SimVRegister add_sub_imm, min_max_imm, mul_imm; - uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5); - uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0); - uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0); - dup_immediate(vform, add_sub_imm, i1 ? one : half); - dup_immediate(vform, min_max_imm, i1 ? one : 0); - dup_immediate(vform, mul_imm, i1 ? two : half); - - switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) { - case FADD_z_p_zs: - fadd(vform, result, zdn, add_sub_imm); - break; - case FMAXNM_z_p_zs: - fmaxnm(vform, result, zdn, min_max_imm); - break; - case FMAX_z_p_zs: - fmax(vform, result, zdn, min_max_imm); - break; - case FMINNM_z_p_zs: - fminnm(vform, result, zdn, min_max_imm); - break; - case FMIN_z_p_zs: - fmin(vform, result, zdn, min_max_imm); - break; - case FMUL_z_p_zs: - fmul(vform, result, zdn, mul_imm); - break; - case FSUBR_z_p_zs: - fsub(vform, result, add_sub_imm, zdn); - break; - case FSUB_z_p_zs: - fsub(vform, result, zdn, add_sub_imm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - - switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) { - case FTMAD_z_zzi: - ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) { - case FADD_z_zz: - fadd(vform, zd, zn, zm); - break; - case FMUL_z_zz: - fmul(vform, zd, zn, zm); - break; - case FRECPS_z_zz: - frecps(vform, zd, zn, zm); - break; - case FRSQRTS_z_zz: - frsqrts(vform, zd, zn, zm); - break; - case FSUB_z_zz: - fsub(vform, zd, zn, zm); - break; - case FTSMUL_z_zz: - ftsmul(vform, zd, zn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) { - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister result; - - switch (instr->Mask(SVEFPCompareVectorsMask)) { - case FACGE_p_p_zz: - fabscmp(vform, result, zn, zm, ge); - break; - case FACGT_p_p_zz: - fabscmp(vform, result, zn, zm, gt); - break; - case FCMEQ_p_p_zz: - fcmp(vform, result, zn, zm, eq); - break; - case FCMGE_p_p_zz: - fcmp(vform, result, zn, zm, ge); - break; - case FCMGT_p_p_zz: - fcmp(vform, result, zn, zm, gt); - break; - case FCMNE_p_p_zz: - fcmp(vform, result, zn, zm, ne); - break; - case FCMUO_p_p_zz: - fcmp(vform, result, zn, zm, uo); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - ExtractFromSimVRegister(vform, pd, result); - mov_zeroing(pd, pg, pd); -} - -void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) { - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister result; - - SimVRegister zeros; - dup_immediate(kFormatVnD, zeros, 0); - - switch (instr->Mask(SVEFPCompareWithZeroMask)) { - case FCMEQ_p_p_z0: - fcmp(vform, result, zn, zeros, eq); - break; - case FCMGE_p_p_z0: - fcmp(vform, result, zn, zeros, ge); - break; - case FCMGT_p_p_z0: - fcmp(vform, result, zn, zeros, gt); - break; - case FCMLE_p_p_z0: - fcmp(vform, result, zn, zeros, le); - break; - case FCMLT_p_p_z0: - fcmp(vform, result, zn, zeros, lt); - break; - case FCMNE_p_p_z0: - fcmp(vform, result, zn, zeros, ne); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - ExtractFromSimVRegister(vform, pd, result); - mov_zeroing(pd, pg, pd); -} - -void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - - if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { - VIXL_UNIMPLEMENTED(); - } - - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int rot = instr->ExtractBit(16); - - SimVRegister result; - - switch (instr->Mask(SVEFPComplexAdditionMask)) { - case FCADD_z_p_zz: - fcadd(vform, result, zdn, zm, rot); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - - if (LaneSizeInBitsFromFormat(vform) == kBRegSize) { - VIXL_UNIMPLEMENTED(); - } - - SimVRegister& zda = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int rot = instr->ExtractBits(14, 13); - - SimVRegister result; - - switch (instr->Mask(SVEFPComplexMulAddMask)) { - case FCMLA_z_p_zzz: - fcmla(vform, result, zn, zm, zda, rot); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zda, pg, result); -} - -void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) { - SimVRegister& zda = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - int rot = instr->ExtractBits(11, 10); - unsigned zm_code = instr->GetRm(); - int index = -1; - VectorFormat vform, vform_dup; - - switch (instr->Mask(SVEFPComplexMulAddIndexMask)) { - case FCMLA_z_zzzi_h: - vform = kFormatVnH; - vform_dup = kFormatVnS; - index = zm_code >> 3; - zm_code &= 0x7; - break; - case FCMLA_z_zzzi_s: - vform = kFormatVnS; - vform_dup = kFormatVnD; - index = zm_code >> 4; - zm_code &= 0xf; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (index >= 0) { - SimVRegister temp; - dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index); - fcmla(vform, zda, zn, temp, zda, rot); - } -} - -typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src); - -void Simulator::VisitSVEFPFastReduction(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& vd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int lane_size = LaneSizeInBitsFromFormat(vform); - - uint64_t inactive_value = 0; - FastReduceFn fn = nullptr; - - switch (instr->Mask(SVEFPFastReductionMask)) { - case FADDV_v_p_z: - fn = &Simulator::faddv; - break; - case FMAXNMV_v_p_z: - inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); - fn = &Simulator::fmaxnmv; - break; - case FMAXV_v_p_z: - inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); - fn = &Simulator::fmaxv; - break; - case FMINNMV_v_p_z: - inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); - fn = &Simulator::fminnmv; - break; - case FMINV_v_p_z: - inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); - fn = &Simulator::fminv; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister scratch; - dup_immediate(vform, scratch, inactive_value); - mov_merging(vform, scratch, pg, zn); - if (fn != nullptr) (this->*fn)(vform, vd, scratch); -} - -void Simulator::VisitSVEFPMulIndex(const Instruction* instr) { - VectorFormat vform = kFormatUndefined; - unsigned zm_code = instr->GetRm() & 0xf; - unsigned index = instr->ExtractBits(20, 19); - - switch (instr->Mask(SVEFPMulIndexMask)) { - case FMUL_z_zzi_d: - vform = kFormatVnD; - index >>= 1; // Only bit 20 is the index for D lanes. - break; - case FMUL_z_zzi_h_i3h: - index += 4; // Bit 22 (i3h) is the top bit of index. - VIXL_FALLTHROUGH(); - case FMUL_z_zzi_h: - vform = kFormatVnH; - zm_code &= 7; // Three bits used for zm. - break; - case FMUL_z_zzi_s: - vform = kFormatVnS; - zm_code &= 7; // Three bits used for zm. - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister temp; - - dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); - fmul(vform, zd, zn, temp); -} - -void Simulator::VisitSVEFPMulAdd(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - if (instr->ExtractBit(15) == 0) { - // Floating-point multiply-accumulate writing addend. - SimVRegister& zm = ReadVRegister(instr->GetRm()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - switch (instr->Mask(SVEFPMulAddMask)) { - // zda = zda + zn * zm - case FMLA_z_p_zzz: - fmla(vform, result, zd, zn, zm); - break; - // zda = -zda + -zn * zm - case FNMLA_z_p_zzz: - fneg(vform, result, zd); - fmls(vform, result, result, zn, zm); - break; - // zda = zda + -zn * zm - case FMLS_z_p_zzz: - fmls(vform, result, zd, zn, zm); - break; - // zda = -zda + zn * zm - case FNMLS_z_p_zzz: - fneg(vform, result, zd); - fmla(vform, result, result, zn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } else { - // Floating-point multiply-accumulate writing multiplicand. - SimVRegister& za = ReadVRegister(instr->GetRm()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - - switch (instr->Mask(SVEFPMulAddMask)) { - // zdn = za + zdn * zm - case FMAD_z_p_zzz: - fmla(vform, result, za, zd, zm); - break; - // zdn = -za + -zdn * zm - case FNMAD_z_p_zzz: - fneg(vform, result, za); - fmls(vform, result, result, zd, zm); - break; - // zdn = za + -zdn * zm - case FMSB_z_p_zzz: - fmls(vform, result, za, zd, zm); - break; - // zdn = -za + zdn * zm - case FNMSB_z_p_zzz: - fneg(vform, result, za); - fmla(vform, result, result, zd, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } - - mov_merging(vform, zd, pg, result); -} - -void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) { - VectorFormat vform = kFormatUndefined; - unsigned zm_code = 0xffffffff; - unsigned index = 0xffffffff; - - switch (instr->Mask(SVEFPMulAddIndexMask)) { - case FMLA_z_zzzi_d: - case FMLS_z_zzzi_d: - vform = kFormatVnD; - zm_code = instr->GetRmLow16(); - // Only bit 20 is the index for D lanes. - index = instr->ExtractBit(20); - break; - case FMLA_z_zzzi_s: - case FMLS_z_zzzi_s: - vform = kFormatVnS; - zm_code = instr->GetRm() & 0x7; // Three bits used for zm. - index = instr->ExtractBits(20, 19); - break; - case FMLA_z_zzzi_h: - case FMLS_z_zzzi_h: - case FMLA_z_zzzi_h_i3h: - case FMLS_z_zzzi_h_i3h: - vform = kFormatVnH; - zm_code = instr->GetRm() & 0x7; // Three bits used for zm. - index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister temp; - - dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index); - if (instr->ExtractBit(10) == 1) { - fmls(vform, zd, zd, zn, temp); - } else { - fmla(vform, zd, zd, zn, temp); - } -} - -void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int dst_data_size; - int src_data_size; - - switch (instr->Mask(SVEFPConvertToIntMask)) { - case FCVTZS_z_p_z_d2w: - case FCVTZU_z_p_z_d2w: - dst_data_size = kSRegSize; - src_data_size = kDRegSize; - break; - case FCVTZS_z_p_z_d2x: - case FCVTZU_z_p_z_d2x: - dst_data_size = kDRegSize; - src_data_size = kDRegSize; - break; - case FCVTZS_z_p_z_fp162h: - case FCVTZU_z_p_z_fp162h: - dst_data_size = kHRegSize; - src_data_size = kHRegSize; - break; - case FCVTZS_z_p_z_fp162w: - case FCVTZU_z_p_z_fp162w: - dst_data_size = kSRegSize; - src_data_size = kHRegSize; - break; - case FCVTZS_z_p_z_fp162x: - case FCVTZU_z_p_z_fp162x: - dst_data_size = kDRegSize; - src_data_size = kHRegSize; - break; - case FCVTZS_z_p_z_s2w: - case FCVTZU_z_p_z_s2w: - dst_data_size = kSRegSize; - src_data_size = kSRegSize; - break; - case FCVTZS_z_p_z_s2x: - case FCVTZU_z_p_z_s2x: - dst_data_size = kDRegSize; - src_data_size = kSRegSize; - break; - default: - VIXL_UNIMPLEMENTED(); - dst_data_size = 0; - src_data_size = 0; - break; - } - - VectorFormat vform = - SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); - - if (instr->ExtractBit(16) == 0) { - fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero); - } else { - fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero); - } -} - -void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - int dst_data_size; - int src_data_size; - - switch (instr->Mask(SVEFPConvertPrecisionMask)) { - case FCVT_z_p_z_d2h: - dst_data_size = kHRegSize; - src_data_size = kDRegSize; - break; - case FCVT_z_p_z_d2s: - dst_data_size = kSRegSize; - src_data_size = kDRegSize; - break; - case FCVT_z_p_z_h2d: - dst_data_size = kDRegSize; - src_data_size = kHRegSize; - break; - case FCVT_z_p_z_h2s: - dst_data_size = kSRegSize; - src_data_size = kHRegSize; - break; - case FCVT_z_p_z_s2d: - dst_data_size = kDRegSize; - src_data_size = kSRegSize; - break; - case FCVT_z_p_z_s2h: - dst_data_size = kHRegSize; - src_data_size = kSRegSize; - break; - default: - VIXL_UNIMPLEMENTED(); - dst_data_size = 0; - src_data_size = 0; - break; - } - VectorFormat vform = - SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); - - fcvt(vform, dst_data_size, src_data_size, zd, pg, zn); -} - -void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister result; - - switch (instr->Mask(SVEFPUnaryOpMask)) { - case FRECPX_z_p_z: - frecpx(vform, result, zn); - break; - case FSQRT_z_p_z: - fsqrt(vform, result, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zd, pg, result); -} - -void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = instr->GetSVEVectorFormat(); - FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); - bool exact_exception = false; - - switch (instr->Mask(SVEFPRoundToIntegralValueMask)) { - case FRINTA_z_p_z: - fpcr_rounding = FPTieAway; - break; - case FRINTI_z_p_z: - break; // Use FPCR rounding mode. - case FRINTM_z_p_z: - fpcr_rounding = FPNegativeInfinity; - break; - case FRINTN_z_p_z: - fpcr_rounding = FPTieEven; - break; - case FRINTP_z_p_z: - fpcr_rounding = FPPositiveInfinity; - break; - case FRINTX_z_p_z: - exact_exception = true; - break; - case FRINTZ_z_p_z: - fpcr_rounding = FPZero; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister result; - frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger); - mov_merging(vform, zd, pg, result); -} - -void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); - int dst_data_size; - int src_data_size; - - switch (instr->Mask(SVEIntConvertToFPMask)) { - case SCVTF_z_p_z_h2fp16: - case UCVTF_z_p_z_h2fp16: - dst_data_size = kHRegSize; - src_data_size = kHRegSize; - break; - case SCVTF_z_p_z_w2d: - case UCVTF_z_p_z_w2d: - dst_data_size = kDRegSize; - src_data_size = kSRegSize; - break; - case SCVTF_z_p_z_w2fp16: - case UCVTF_z_p_z_w2fp16: - dst_data_size = kHRegSize; - src_data_size = kSRegSize; - break; - case SCVTF_z_p_z_w2s: - case UCVTF_z_p_z_w2s: - dst_data_size = kSRegSize; - src_data_size = kSRegSize; - break; - case SCVTF_z_p_z_x2d: - case UCVTF_z_p_z_x2d: - dst_data_size = kDRegSize; - src_data_size = kDRegSize; - break; - case SCVTF_z_p_z_x2fp16: - case UCVTF_z_p_z_x2fp16: - dst_data_size = kHRegSize; - src_data_size = kDRegSize; - break; - case SCVTF_z_p_z_x2s: - case UCVTF_z_p_z_x2s: - dst_data_size = kSRegSize; - src_data_size = kDRegSize; - break; - default: - VIXL_UNIMPLEMENTED(); - dst_data_size = 0; - src_data_size = 0; - break; - } - - VectorFormat vform = - SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size)); - - if (instr->ExtractBit(16) == 0) { - scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding); - } else { - ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding); - } -} - -void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode()); - - switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) { - case FRECPE_z_z: - frecpe(vform, zd, zn, fpcr_rounding); - break; - case FRSQRTE_z_z: - frsqrte(vform, zd, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5)); - - int count = CountActiveLanes(vform, pg); - - if (instr->ExtractBit(11) == 0) { - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { - case DECP_z_p_z: - sub_uint(vform, zdn, zdn, count); - break; - case INCP_z_p_z: - add_uint(vform, zdn, zdn, count); - break; - case SQDECP_z_p_z: - sub_uint(vform, zdn, zdn, count).SignedSaturate(vform); - break; - case SQINCP_z_p_z: - add_uint(vform, zdn, zdn, count).SignedSaturate(vform); - break; - case UQDECP_z_p_z: - sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform); - break; - case UQINCP_z_p_z: - add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } else { - bool is_saturating = (instr->ExtractBit(18) == 0); - bool decrement = - is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16); - bool is_signed = (instr->ExtractBit(16) == 0); - bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true; - unsigned width = sf ? kXRegSize : kWRegSize; - - switch (instr->Mask(SVEIncDecByPredicateCountMask)) { - case DECP_r_p_r: - case INCP_r_p_r: - case SQDECP_r_p_r_sx: - case SQDECP_r_p_r_x: - case SQINCP_r_p_r_sx: - case SQINCP_r_p_r_x: - case UQDECP_r_p_r_uw: - case UQDECP_r_p_r_x: - case UQINCP_r_p_r_uw: - case UQINCP_r_p_r_x: - WriteXRegister(instr->GetRd(), - IncDecN(ReadXRegister(instr->GetRd()), - decrement ? -count : count, - width, - is_saturating, - is_signed)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } -} - -uint64_t Simulator::IncDecN(uint64_t acc, - int64_t delta, - unsigned n, - bool is_saturating, - bool is_signed) { - VIXL_ASSERT(n <= 64); - VIXL_ASSERT(IsIntN(n, delta)); - - uint64_t sign_mask = UINT64_C(1) << (n - 1); - uint64_t mask = GetUintMask(n); - - acc &= mask; // Ignore initial accumulator high bits. - uint64_t result = (acc + delta) & mask; - - bool result_negative = ((result & sign_mask) != 0); - - if (is_saturating) { - if (is_signed) { - bool acc_negative = ((acc & sign_mask) != 0); - bool delta_negative = delta < 0; - - // If the signs of the operands are the same, but different from the - // result, there was an overflow. - if ((acc_negative == delta_negative) && - (acc_negative != result_negative)) { - if (result_negative) { - // Saturate to [..., INT<n>_MAX]. - result_negative = false; - result = mask & ~sign_mask; // E.g. 0x000000007fffffff - } else { - // Saturate to [INT<n>_MIN, ...]. - result_negative = true; - result = ~mask | sign_mask; // E.g. 0xffffffff80000000 - } - } - } else { - if ((delta < 0) && (result > acc)) { - // Saturate to [0, ...]. - result = 0; - } else if ((delta > 0) && (result < acc)) { - // Saturate to [..., UINT<n>_MAX]. - result = mask; - } - } - } - - // Sign-extend if necessary. - if (result_negative && is_signed) result |= ~mask; - - return result; -} - -void Simulator::VisitSVEIndexGeneration(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVEIndexGenerationMask)) { - case INDEX_z_ii: - case INDEX_z_ir: - case INDEX_z_ri: - case INDEX_z_rr: { - uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn()) - : instr->ExtractSignedBits(9, 5); - uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm()) - : instr->ExtractSignedBits(20, 16); - index(vform, zd, start, step); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) { - case ADD_z_zz: - add(vform, zd, zn, zm); - break; - case SQADD_z_zz: - add(vform, zd, zn, zm).SignedSaturate(vform); - break; - case SQSUB_z_zz: - sub(vform, zd, zn, zm).SignedSaturate(vform); - break; - case SUB_z_zz: - sub(vform, zd, zn, zm); - break; - case UQADD_z_zz: - add(vform, zd, zn, zm).UnsignedSaturate(vform); - break; - case UQSUB_z_zz: - sub(vform, zd, zn, zm).UnsignedSaturate(vform); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntAddSubtractVectors_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) { - case ADD_z_p_zz: - add(vform, result, zdn, zm); - break; - case SUBR_z_p_zz: - sub(vform, result, zm, zdn); - break; - case SUB_z_p_zz: - sub(vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) { - case AND_z_p_zz: - SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm); - break; - case BIC_z_p_zz: - SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm); - break; - case EOR_z_p_zz: - SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm); - break; - case ORR_z_p_zz: - SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) { - case MUL_z_p_zz: - mul(vform, result, zdn, zm); - break; - case SMULH_z_p_zz: - smulh(vform, result, zdn, zm); - break; - case UMULH_z_p_zz: - umulh(vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEIntMinMaxDifference_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) { - case SABD_z_p_zz: - absdiff(vform, result, zdn, zm, true); - break; - case SMAX_z_p_zz: - smax(vform, result, zdn, zm); - break; - case SMIN_z_p_zz: - smin(vform, result, zdn, zm); - break; - case UABD_z_p_zz: - absdiff(vform, result, zdn, zm, false); - break; - case UMAX_z_p_zz: - umax(vform, result, zdn, zm); - break; - case UMIN_z_p_zz: - umin(vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister scratch; - - switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) { - case MUL_z_zi: - dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned()); - mul(vform, zd, zd, scratch); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); - - switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) { - case SDIVR_z_p_zz: - sdiv(vform, result, zm, zdn); - break; - case SDIV_z_p_zz: - sdiv(vform, result, zdn, zm); - break; - case UDIVR_z_p_zz: - udiv(vform, result, zm, zdn); - break; - case UDIV_z_p_zz: - udiv(vform, result, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zdn, pg, result); -} - -void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister scratch; - - uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned(); - int64_t signed_imm = instr->GetImmSVEIntWideSigned(); - - switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) { - case SMAX_z_zi: - dup_immediate(vform, scratch, signed_imm); - smax(vform, zd, zd, scratch); - break; - case SMIN_z_zi: - dup_immediate(vform, scratch, signed_imm); - smin(vform, zd, zd, scratch); - break; - case UMAX_z_zi: - dup_immediate(vform, scratch, unsigned_imm); - umax(vform, zd, zd, scratch); - break; - case UMIN_z_zi: - dup_immediate(vform, scratch, unsigned_imm); - umin(vform, zd, zd, scratch); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntCompareScalarCountAndLimit( - const Instruction* instr) { - unsigned rn_code = instr->GetRn(); - unsigned rm_code = instr->GetRm(); - SimPRegister& pd = ReadPRegister(instr->GetPd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - bool is_64_bit = instr->ExtractBit(12) == 1; - int64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code); - int64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); - - bool last = true; - for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { - bool cond = false; - switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) { - case WHILELE_p_p_rr: - cond = src1 <= src2; - break; - case WHILELO_p_p_rr: - cond = static_cast<uint64_t>(src1) < static_cast<uint64_t>(src2); - break; - case WHILELS_p_p_rr: - cond = static_cast<uint64_t>(src1) <= static_cast<uint64_t>(src2); - break; - case WHILELT_p_p_rr: - cond = src1 < src2; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - last = last && cond; - LogicPRegister dst(pd); - dst.SetActive(vform, lane, last); - src1 += 1; - } - - PredTest(vform, GetPTrue(), pd); - LogSystemRegister(NZCV); -} - -void Simulator::VisitSVEConditionallyTerminateScalars( - const Instruction* instr) { - unsigned rn_code = instr->GetRn(); - unsigned rm_code = instr->GetRm(); - bool is_64_bit = instr->ExtractBit(22) == 1; - uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code); - uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code); - bool term; - switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) { - case CTERMEQ_rr: - term = src1 == src2; - break; - case CTERMNE_rr: - term = src1 != src2; - break; - default: - term = false; - VIXL_UNIMPLEMENTED(); - break; - } - ReadNzcv().SetN(term ? 1 : 0); - ReadNzcv().SetV(term ? 0 : !ReadC()); - LogSystemRegister(NZCV); -} - -void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) { - bool commute_inputs = false; - Condition cond; - switch (instr->Mask(SVEIntCompareSignedImmMask)) { - case CMPEQ_p_p_zi: - cond = eq; - break; - case CMPGE_p_p_zi: - cond = ge; - break; - case CMPGT_p_p_zi: - cond = gt; - break; - case CMPLE_p_p_zi: - cond = ge; - commute_inputs = true; - break; - case CMPLT_p_p_zi: - cond = gt; - commute_inputs = true; - break; - case CMPNE_p_p_zi: - cond = ne; - break; - default: - cond = al; - VIXL_UNIMPLEMENTED(); - break; - } - - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister src2; - dup_immediate(vform, - src2, - ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16))); - SVEIntCompareVectorsHelper(cond, - vform, - ReadPRegister(instr->GetPd()), - ReadPRegister(instr->GetPgLow8()), - commute_inputs ? src2 - : ReadVRegister(instr->GetRn()), - commute_inputs ? ReadVRegister(instr->GetRn()) - : src2); -} - -void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) { - bool commute_inputs = false; - Condition cond; - switch (instr->Mask(SVEIntCompareUnsignedImmMask)) { - case CMPHI_p_p_zi: - cond = hi; - break; - case CMPHS_p_p_zi: - cond = hs; - break; - case CMPLO_p_p_zi: - cond = hi; - commute_inputs = true; - break; - case CMPLS_p_p_zi: - cond = hs; - commute_inputs = true; - break; - default: - cond = al; - VIXL_UNIMPLEMENTED(); - break; - } - - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister src2; - dup_immediate(vform, src2, instr->ExtractBits(20, 14)); - SVEIntCompareVectorsHelper(cond, - vform, - ReadPRegister(instr->GetPd()), - ReadPRegister(instr->GetPgLow8()), - commute_inputs ? src2 - : ReadVRegister(instr->GetRn()), - commute_inputs ? ReadVRegister(instr->GetRn()) - : src2); -} - -void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) { - Instr op = instr->Mask(SVEIntCompareVectorsMask); - bool is_wide_elements = false; - switch (op) { - case CMPEQ_p_p_zw: - case CMPGE_p_p_zw: - case CMPGT_p_p_zw: - case CMPHI_p_p_zw: - case CMPHS_p_p_zw: - case CMPLE_p_p_zw: - case CMPLO_p_p_zw: - case CMPLS_p_p_zw: - case CMPLT_p_p_zw: - case CMPNE_p_p_zw: - is_wide_elements = true; - break; - } - - Condition cond; - switch (op) { - case CMPEQ_p_p_zw: - case CMPEQ_p_p_zz: - cond = eq; - break; - case CMPGE_p_p_zw: - case CMPGE_p_p_zz: - cond = ge; - break; - case CMPGT_p_p_zw: - case CMPGT_p_p_zz: - cond = gt; - break; - case CMPHI_p_p_zw: - case CMPHI_p_p_zz: - cond = hi; - break; - case CMPHS_p_p_zw: - case CMPHS_p_p_zz: - cond = hs; - break; - case CMPNE_p_p_zw: - case CMPNE_p_p_zz: - cond = ne; - break; - case CMPLE_p_p_zw: - cond = le; - break; - case CMPLO_p_p_zw: - cond = lo; - break; - case CMPLS_p_p_zw: - cond = ls; - break; - case CMPLT_p_p_zw: - cond = lt; - break; - default: - VIXL_UNIMPLEMENTED(); - cond = al; - break; - } - - SVEIntCompareVectorsHelper(cond, - instr->GetSVEVectorFormat(), - ReadPRegister(instr->GetPd()), - ReadPRegister(instr->GetPgLow8()), - ReadVRegister(instr->GetRn()), - ReadVRegister(instr->GetRm()), - is_wide_elements); -} - -void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) || - (vform == kFormatVnD)); - - switch (instr->Mask(SVEFPExponentialAcceleratorMask)) { - case FEXPA_z_z: - fexpa(vform, zd, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) || - (vform == kFormatVnD)); - - switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) { - case FTSSEL_z_zz: - ftssel(vform, zd, zn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEConstructivePrefix_Unpredicated( - const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) { - case MOVPRFX_z_z: - mov(kFormatVnD, zd, zn); // The lane size is arbitrary. - // Record the movprfx, so the next ExecuteInstruction() can check it. - movprfx_ = instr; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - SimVRegister result; - switch (instr->Mask(SVEIntMulAddPredicatedMask)) { - case MLA_z_p_zzz: - mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm); - break; - case MLS_z_p_zzz: - mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm); - break; - case MAD_z_p_zzz: - // 'za' is encoded in 'Rn'. - mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm); - break; - case MSB_z_p_zzz: { - // 'za' is encoded in 'Rn'. - mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result); -} - -void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zda = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) { - case SDOT_z_zzz: - sdot(vform, zda, zn, zm); - break; - case UDOT_z_zzz: - udot(vform, zda, zn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEMovprfx(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - - switch (instr->Mask(SVEMovprfxMask)) { - case MOVPRFX_z_p_z: - if (instr->ExtractBit(16)) { - mov_merging(vform, zd, pg, zn); - } else { - mov_zeroing(vform, zd, pg, zn); - } - - // Record the movprfx, so the next ExecuteInstruction() can check it. - movprfx_ = instr; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEIntReduction(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& vd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) { - switch (instr->Mask(SVEIntReductionLogicalMask)) { - case ANDV_r_p_z: - andv(vform, vd, pg, zn); - break; - case EORV_r_p_z: - eorv(vform, vd, pg, zn); - break; - case ORV_r_p_z: - orv(vform, vd, pg, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } else { - switch (instr->Mask(SVEIntReductionMask)) { - case SADDV_r_p_z: - saddv(vform, vd, pg, zn); - break; - case SMAXV_r_p_z: - smaxv(vform, vd, pg, zn); - break; - case SMINV_r_p_z: - sminv(vform, vd, pg, zn); - break; - case UADDV_r_p_z: - uaddv(vform, vd, pg, zn); - break; - case UMAXV_r_p_z: - umaxv(vform, vd, pg, zn); - break; - case UMINV_r_p_z: - uminv(vform, vd, pg, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - } -} - -void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - SimVRegister result; - switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) { - case ABS_z_p_z: - abs(vform, result, zn); - break; - case CLS_z_p_z: - cls(vform, result, zn); - break; - case CLZ_z_p_z: - clz(vform, result, zn); - break; - case CNOT_z_p_z: - cnot(vform, result, zn); - break; - case CNT_z_p_z: - cnt(vform, result, zn); - break; - case FABS_z_p_z: - fabs_(vform, result, zn); - break; - case FNEG_z_p_z: - fneg(vform, result, zn); - break; - case NEG_z_p_z: - neg(vform, result, zn); - break; - case NOT_z_p_z: - not_(vform, result, zn); - break; - case SXTB_z_p_z: - case SXTH_z_p_z: - case SXTW_z_p_z: - sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17))); - break; - case UXTB_z_p_z: - case UXTH_z_p_z: - case UXTW_z_p_z: - uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17))); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - mov_merging(vform, zd, pg, result); -} - -void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) { - // There is only one instruction in this group. - VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i); - - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16)); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - - SimVRegister result; - switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { - case FCPY_z_p_i: { - int imm8 = instr->ExtractBits(12, 5); - uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), - Instruction::Imm8ToFP64(imm8)); - dup_immediate(vform, result, value); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } - mov_merging(vform, zd, pg, result); -} - -void Simulator::VisitSVEIntAddSubtractImm_Unpredicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister scratch; - - uint64_t imm = instr->GetImmSVEIntWideUnsigned(); - imm <<= instr->ExtractBit(13) * 8; - - switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) { - case ADD_z_zi: - add_uint(vform, zd, zd, imm); - break; - case SQADD_z_zi: - add_uint(vform, zd, zd, imm).SignedSaturate(vform); - break; - case SQSUB_z_zi: - sub_uint(vform, zd, zd, imm).SignedSaturate(vform); - break; - case SUBR_z_zi: - dup_immediate(vform, scratch, imm); - sub(vform, zd, scratch, zd); - break; - case SUB_z_zi: - sub_uint(vform, zd, zd, imm); - break; - case UQADD_z_zi: - add_uint(vform, zd, zd, imm).UnsignedSaturate(vform); - break; - case UQSUB_z_zi: - sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform); - break; - default: - break; - } -} - -void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - - VectorFormat format = instr->GetSVEVectorFormat(); - int64_t imm = instr->GetImmSVEIntWideSigned(); - int shift = instr->ExtractBit(13) * 8; - imm *= 1 << shift; - - switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) { - case DUP_z_i: - // The encoding of byte-sized lanes with lsl #8 is undefined. - if ((format == kFormatVnB) && (shift == 8)) { - VIXL_UNIMPLEMENTED(); - } else { - dup_immediate(format, zd, imm); - } - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - - switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { - case FDUP_z_i: - switch (vform) { - case kFormatVnH: - dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16())); - break; - case kFormatVnS: - dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32())); - break; - case kFormatVnD: - dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64())); - break; - default: - VIXL_UNIMPLEMENTED(); - } - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1H_z_p_bz_s_x32_scaled: - case LD1SH_z_p_bz_s_x32_scaled: - case LDFF1H_z_p_bz_s_x32_scaled: - case LDFF1SH_z_p_bz_s_x32_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); -} - -void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( - const Instruction* instr) { - switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_s_x32_unscaled: - case LD1H_z_p_bz_s_x32_unscaled: - case LD1SB_z_p_bz_s_x32_unscaled: - case LD1SH_z_p_bz_s_x32_unscaled: - case LD1W_z_p_bz_s_x32_unscaled: - case LDFF1B_z_p_bz_s_x32_unscaled: - case LDFF1H_z_p_bz_s_x32_unscaled: - case LDFF1SB_z_p_bz_s_x32_unscaled: - case LDFF1SH_z_p_bz_s_x32_unscaled: - case LDFF1W_z_p_bz_s_x32_unscaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); -} - -void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LD1H_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LD1SB_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LD1SH_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LD1W_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LDFF1B_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LDFF1H_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LDFF1SB_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LDFF1SH_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - case LDFF1W_z_p_ai_s: - VIXL_UNIMPLEMENTED(); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( - const Instruction* instr) { - switch ( - instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) { - case LD1W_z_p_bz_s_x32_scaled: - case LDFF1W_z_p_bz_s_x32_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod); -} - -void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( - const Instruction* instr) { - switch ( - instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_bz_s_x32_scaled: - case PRFD_i_p_bz_s_x32_scaled: - case PRFH_i_p_bz_s_x32_scaled: - case PRFW_i_p_bz_s_x32_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_ai_s: - case PRFD_i_p_ai_s: - case PRFH_i_p_ai_s: - case PRFW_i_p_ai_s: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_bi_s: - case PRFD_i_p_bi_s: - case PRFH_i_p_bi_s: - case PRFW_i_p_bi_s: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar( - const Instruction* instr) { - switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_br_s: - case PRFD_i_p_br_s: - case PRFH_i_p_br_s: - case PRFW_i_p_br_s: - if (instr->GetRm() == kZeroRegCode) { - VIXL_UNIMPLEMENTED(); - } - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) { - bool is_signed; - switch (instr->Mask(SVELoadAndBroadcastElementMask)) { - case LD1RB_z_p_bi_u8: - case LD1RB_z_p_bi_u16: - case LD1RB_z_p_bi_u32: - case LD1RB_z_p_bi_u64: - case LD1RH_z_p_bi_u16: - case LD1RH_z_p_bi_u32: - case LD1RH_z_p_bi_u64: - case LD1RW_z_p_bi_u32: - case LD1RW_z_p_bi_u64: - case LD1RD_z_p_bi_u64: - is_signed = false; - break; - case LD1RSB_z_p_bi_s16: - case LD1RSB_z_p_bi_s32: - case LD1RSB_z_p_bi_s64: - case LD1RSH_z_p_bi_s32: - case LD1RSH_z_p_bi_s64: - case LD1RSW_z_p_bi_s64: - is_signed = true; - break; - default: - // This encoding group is complete, so no other values should be possible. - VIXL_UNREACHABLE(); - is_signed = false; - break; - } - - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13); - VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2; - uint64_t base = ReadXRegister(instr->GetRn()) + offset; - VectorFormat unpack_vform = - SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); - SimVRegister temp; - ld1r(vform, unpack_vform, temp, base, is_signed); - mov_zeroing(vform, - ReadVRegister(instr->GetRt()), - ReadPRegister(instr->GetPgLow8()), - temp); -} - -void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) { - switch (instr->Mask(SVELoadPredicateRegisterMask)) { - case LDR_p_bi: { - SimPRegister& pt = ReadPRegister(instr->GetPt()); - int pl = GetPredicateLengthInBytes(); - int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); - uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); - uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl; - for (int i = 0; i < pl; i++) { - pt.Insert(i, Memory::Read<uint8_t>(address + i)); - } - LogPRead(instr->GetPt(), address); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) { - switch (instr->Mask(SVELoadVectorRegisterMask)) { - case LDR_z_bi: { - SimVRegister& zt = ReadVRegister(instr->GetRt()); - int vl = GetVectorLengthInBytes(); - int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); - uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); - uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl; - for (int i = 0; i < vl; i++) { - zt.Insert(i, Memory::Read<uint8_t>(address + i)); - } - LogZRead(instr->GetRt(), address); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) { - case LD1D_z_p_bz_d_x32_scaled: - case LD1H_z_p_bz_d_x32_scaled: - case LD1SH_z_p_bz_d_x32_scaled: - case LD1SW_z_p_bz_d_x32_scaled: - case LD1W_z_p_bz_d_x32_scaled: - case LDFF1H_z_p_bz_d_x32_scaled: - case LDFF1W_z_p_bz_d_x32_scaled: - case LDFF1D_z_p_bz_d_x32_scaled: - case LDFF1SH_z_p_bz_d_x32_scaled: - case LDFF1SW_z_p_bz_d_x32_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod); -} - -void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) { - case LD1D_z_p_bz_d_64_scaled: - case LD1H_z_p_bz_d_64_scaled: - case LD1SH_z_p_bz_d_64_scaled: - case LD1SW_z_p_bz_d_64_scaled: - case LD1W_z_p_bz_d_64_scaled: - case LDFF1H_z_p_bz_d_64_scaled: - case LDFF1W_z_p_bz_d_64_scaled: - case LDFF1D_z_p_bz_d_64_scaled: - case LDFF1SH_z_p_bz_d_64_scaled: - case LDFF1SW_z_p_bz_d_64_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL); -} - -void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( - const Instruction* instr) { - switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_64_unscaled: - case LD1D_z_p_bz_d_64_unscaled: - case LD1H_z_p_bz_d_64_unscaled: - case LD1SB_z_p_bz_d_64_unscaled: - case LD1SH_z_p_bz_d_64_unscaled: - case LD1SW_z_p_bz_d_64_unscaled: - case LD1W_z_p_bz_d_64_unscaled: - case LDFF1B_z_p_bz_d_64_unscaled: - case LDFF1D_z_p_bz_d_64_unscaled: - case LDFF1H_z_p_bz_d_64_unscaled: - case LDFF1SB_z_p_bz_d_64_unscaled: - case LDFF1SH_z_p_bz_d_64_unscaled: - case LDFF1SW_z_p_bz_d_64_unscaled: - case LDFF1W_z_p_bz_d_64_unscaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEGatherLoadScalarPlusVectorHelper(instr, - kFormatVnD, - NO_SVE_OFFSET_MODIFIER); -} - -void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case LD1B_z_p_bz_d_x32_unscaled: - case LD1D_z_p_bz_d_x32_unscaled: - case LD1H_z_p_bz_d_x32_unscaled: - case LD1SB_z_p_bz_d_x32_unscaled: - case LD1SH_z_p_bz_d_x32_unscaled: - case LD1SW_z_p_bz_d_x32_unscaled: - case LD1W_z_p_bz_d_x32_unscaled: - case LDFF1B_z_p_bz_d_x32_unscaled: - case LDFF1H_z_p_bz_d_x32_unscaled: - case LDFF1W_z_p_bz_d_x32_unscaled: - case LDFF1D_z_p_bz_d_x32_unscaled: - case LDFF1SB_z_p_bz_d_x32_unscaled: - case LDFF1SH_z_p_bz_d_x32_unscaled: - case LDFF1SW_z_p_bz_d_x32_unscaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW; - SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod); -} - -void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) { - case LD1B_z_p_ai_d: - case LD1D_z_p_ai_d: - case LD1H_z_p_ai_d: - case LD1SB_z_p_ai_d: - case LD1SH_z_p_ai_d: - case LD1SW_z_p_ai_d: - case LD1W_z_p_ai_d: - case LDFF1B_z_p_ai_d: - case LDFF1D_z_p_ai_d: - case LDFF1H_z_p_ai_d: - case LDFF1SB_z_p_ai_d: - case LDFF1SH_z_p_ai_d: - case LDFF1SW_z_p_ai_d: - case LDFF1W_z_p_ai_d: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - bool is_signed = instr->ExtractBit(14) == 0; - bool is_ff = instr->ExtractBit(13) == 1; - // Note that these instructions don't use the Dtype encoding. - int msize_in_bytes_log2 = instr->ExtractBits(24, 23); - uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2; - LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - if (is_ff) { - VIXL_UNIMPLEMENTED(); - } else { - SVEStructuredLoadHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - is_signed); - } -} - -void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( - const Instruction* instr) { - switch ( - instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_bz_d_64_scaled: - case PRFD_i_p_bz_d_64_scaled: - case PRFH_i_p_bz_d_64_scaled: - case PRFW_i_p_bz_d_64_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator:: - VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_bz_d_x32_scaled: - case PRFD_i_p_bz_d_x32_scaled: - case PRFH_i_p_bz_d_x32_scaled: - case PRFW_i_p_bz_d_x32_scaled: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) { - // Ignore prefetch hint instructions. - case PRFB_i_p_ai_d: - case PRFD_i_p_ai_d: - case PRFH_i_p_ai_d: - case PRFW_i_p_ai_d: - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( - const Instruction* instr) { - bool is_signed; - switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { - case LDFF1B_z_p_br_u8: - case LDFF1B_z_p_br_u16: - case LDFF1B_z_p_br_u32: - case LDFF1B_z_p_br_u64: - case LDFF1H_z_p_br_u16: - case LDFF1H_z_p_br_u32: - case LDFF1H_z_p_br_u64: - case LDFF1W_z_p_br_u32: - case LDFF1W_z_p_br_u64: - case LDFF1D_z_p_br_u64: - is_signed = false; - break; - case LDFF1SB_z_p_br_s16: - case LDFF1SB_z_p_br_s32: - case LDFF1SB_z_p_br_s64: - case LDFF1SH_z_p_br_s32: - case LDFF1SH_z_p_br_s64: - case LDFF1SW_z_p_br_s64: - is_signed = true; - break; - default: - // This encoding group is complete, so no other values should be possible. - VIXL_UNREACHABLE(); - is_signed = false; - break; - } - - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); - VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - uint64_t offset = ReadXRegister(instr->GetRm()); - offset <<= msize_in_bytes_log2; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEFaultTolerantLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - kSVEFirstFaultLoad, - is_signed); -} - -void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( - const Instruction* instr) { - bool is_signed = false; - switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) { - case LDNF1B_z_p_bi_u16: - case LDNF1B_z_p_bi_u32: - case LDNF1B_z_p_bi_u64: - case LDNF1B_z_p_bi_u8: - case LDNF1D_z_p_bi_u64: - case LDNF1H_z_p_bi_u16: - case LDNF1H_z_p_bi_u32: - case LDNF1H_z_p_bi_u64: - case LDNF1W_z_p_bi_u32: - case LDNF1W_z_p_bi_u64: - break; - case LDNF1SB_z_p_bi_s16: - case LDNF1SB_z_p_bi_s32: - case LDNF1SB_z_p_bi_s64: - case LDNF1SH_z_p_bi_s32: - case LDNF1SH_z_p_bi_s64: - case LDNF1SW_z_p_bi_s64: - is_signed = true; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); - VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - int vl = GetVectorLengthInBytes(); - int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; - uint64_t offset = - (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEFaultTolerantLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - kSVENonFaultLoad, - is_signed); -} - -void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( - const Instruction* instr) { - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = kFormatUndefined; - - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) { - case LDNT1B_z_p_bi_contiguous: - vform = kFormatVnB; - break; - case LDNT1D_z_p_bi_contiguous: - vform = kFormatVnD; - break; - case LDNT1H_z_p_bi_contiguous: - vform = kFormatVnH; - break; - case LDNT1W_z_p_bi_contiguous: - vform = kFormatVnS; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - int vl = GetVectorLengthInBytes(); - uint64_t offset = instr->ExtractSignedBits(19, 16) * vl; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredLoadHelper(vform, - pg, - instr->GetRt(), - addr, - /* is_signed = */ false); -} - -void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( - const Instruction* instr) { - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = kFormatUndefined; - - switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) { - case LDNT1B_z_p_br_contiguous: - vform = kFormatVnB; - break; - case LDNT1D_z_p_br_contiguous: - vform = kFormatVnD; - break; - case LDNT1H_z_p_br_contiguous: - vform = kFormatVnH; - break; - case LDNT1W_z_p_br_contiguous: - vform = kFormatVnS; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredLoadHelper(vform, - pg, - instr->GetRt(), - addr, - /* is_signed = */ false); -} - -void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm( - const Instruction* instr) { - SimVRegister& zt = ReadVRegister(instr->GetRt()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); - uint64_t offset = instr->ExtractSignedBits(19, 16) * 16; - - VectorFormat vform = kFormatUndefined; - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) { - case LD1RQB_z_p_bi_u8: - vform = kFormatVnB; - break; - case LD1RQD_z_p_bi_u64: - vform = kFormatVnD; - break; - case LD1RQH_z_p_bi_u16: - vform = kFormatVnH; - break; - case LD1RQW_z_p_bi_u32: - vform = kFormatVnS; - break; - default: - addr = offset = 0; - break; - } - ld1(kFormat16B, zt, addr + offset); - mov_zeroing(vform, zt, pg, zt); - dup_element(kFormatVnQ, zt, zt, 0); -} - -void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar( - const Instruction* instr) { - SimVRegister& zt = ReadVRegister(instr->GetRt()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); - uint64_t offset = ReadXRegister(instr->GetRm()); - - VectorFormat vform = kFormatUndefined; - switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) { - case LD1RQB_z_p_br_contiguous: - vform = kFormatVnB; - break; - case LD1RQD_z_p_br_contiguous: - vform = kFormatVnD; - offset <<= 3; - break; - case LD1RQH_z_p_br_contiguous: - vform = kFormatVnH; - offset <<= 1; - break; - case LD1RQW_z_p_br_contiguous: - vform = kFormatVnS; - offset <<= 2; - break; - default: - addr = offset = 0; - break; - } - ld1(kFormat16B, zt, addr + offset); - mov_zeroing(vform, zt, pg, zt); - dup_element(kFormatVnQ, zt, zt, 0); -} - -void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) { - case LD2B_z_p_bi_contiguous: - case LD2D_z_p_bi_contiguous: - case LD2H_z_p_bi_contiguous: - case LD2W_z_p_bi_contiguous: - case LD3B_z_p_bi_contiguous: - case LD3D_z_p_bi_contiguous: - case LD3H_z_p_bi_contiguous: - case LD3W_z_p_bi_contiguous: - case LD4B_z_p_bi_contiguous: - case LD4D_z_p_bi_contiguous: - case LD4H_z_p_bi_contiguous: - case LD4W_z_p_bi_contiguous: { - int vl = GetVectorLengthInBytes(); - int msz = instr->ExtractBits(24, 23); - int reg_count = instr->ExtractBits(22, 21) + 1; - uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count; - LogicSVEAddressVector addr( - ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); - addr.SetMsizeInBytesLog2(msz); - addr.SetRegCount(reg_count); - SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz), - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar( - const Instruction* instr) { - switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) { - case LD2B_z_p_br_contiguous: - case LD2D_z_p_br_contiguous: - case LD2H_z_p_br_contiguous: - case LD2W_z_p_br_contiguous: - case LD3B_z_p_br_contiguous: - case LD3D_z_p_br_contiguous: - case LD3H_z_p_br_contiguous: - case LD3W_z_p_br_contiguous: - case LD4B_z_p_br_contiguous: - case LD4D_z_p_br_contiguous: - case LD4H_z_p_br_contiguous: - case LD4W_z_p_br_contiguous: { - int msz = instr->ExtractBits(24, 23); - uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); - LogicSVEAddressVector addr( - ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); - addr.SetMsizeInBytesLog2(msz); - addr.SetRegCount(instr->ExtractBits(22, 21) + 1); - SVEStructuredLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - false); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) { - case ST1H_z_p_bz_s_x32_scaled: - case ST1W_z_p_bz_s_x32_scaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - int scale = instr->ExtractBit(21) * msize_in_bytes_log2; - uint64_t base = ReadXRegister(instr->GetRn()); - SVEOffsetModifier mod = - (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnS, - mod, - scale); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnS, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( - const Instruction* instr) { - switch ( - instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_s_x32_unscaled: - case ST1H_z_p_bz_s_x32_unscaled: - case ST1W_z_p_bz_s_x32_unscaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - uint64_t base = ReadXRegister(instr->GetRn()); - SVEOffsetModifier mod = - (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnS, - mod); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnS, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE32BitScatterStore_VectorPlusImm( - const Instruction* instr) { - int msz = 0; - switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) { - case ST1B_z_p_ai_s: - msz = 0; - break; - case ST1H_z_p_ai_s: - msz = 1; - break; - case ST1W_z_p_ai_s: - msz = 2; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - uint64_t imm = instr->ExtractBits(20, 16) << msz; - LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS); - addr.SetMsizeInBytesLog2(msz); - SVEStructuredStoreHelper(kFormatVnS, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); -} - -void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_64_scaled: - case ST1H_z_p_bz_d_64_scaled: - case ST1W_z_p_bz_d_64_scaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - int scale = instr->ExtractBit(21) * msize_in_bytes_log2; - uint64_t base = ReadXRegister(instr->GetRn()); - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnD, - SVE_LSL, - scale); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( - const Instruction* instr) { - switch ( - instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_64_unscaled: - case ST1D_z_p_bz_d_64_unscaled: - case ST1H_z_p_bz_d_64_unscaled: - case ST1W_z_p_bz_d_64_unscaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - uint64_t base = ReadXRegister(instr->GetRn()); - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnD, - NO_SVE_OFFSET_MODIFIER); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) { - case ST1D_z_p_bz_d_x32_scaled: - case ST1H_z_p_bz_d_x32_scaled: - case ST1W_z_p_bz_d_x32_scaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - int scale = instr->ExtractBit(21) * msize_in_bytes_log2; - uint64_t base = ReadXRegister(instr->GetRn()); - SVEOffsetModifier mod = - (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnD, - mod, - scale); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator:: - VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( - const Instruction* instr) { - switch (instr->Mask( - SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) { - case ST1B_z_p_bz_d_x32_unscaled: - case ST1D_z_p_bz_d_x32_unscaled: - case ST1H_z_p_bz_d_x32_unscaled: - case ST1W_z_p_bz_d_x32_unscaled: { - unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2); - uint64_t base = ReadXRegister(instr->GetRn()); - SVEOffsetModifier mod = - (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW; - LogicSVEAddressVector addr(base, - &ReadVRegister(instr->GetRm()), - kFormatVnD, - mod); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVE64BitScatterStore_VectorPlusImm( - const Instruction* instr) { - int msz = 0; - switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) { - case ST1B_z_p_ai_d: - msz = 0; - break; - case ST1D_z_p_ai_d: - msz = 3; - break; - case ST1H_z_p_ai_d: - msz = 1; - break; - case ST1W_z_p_ai_d: - msz = 2; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - uint64_t imm = instr->ExtractBits(20, 16) << msz; - LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD); - addr.SetMsizeInBytesLog2(msz); - SVEStructuredStoreHelper(kFormatVnD, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); -} - -void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( - const Instruction* instr) { - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = kFormatUndefined; - - switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) { - case STNT1B_z_p_bi_contiguous: - vform = kFormatVnB; - break; - case STNT1D_z_p_bi_contiguous: - vform = kFormatVnD; - break; - case STNT1H_z_p_bi_contiguous: - vform = kFormatVnH; - break; - case STNT1W_z_p_bi_contiguous: - vform = kFormatVnS; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - int vl = GetVectorLengthInBytes(); - uint64_t offset = instr->ExtractSignedBits(19, 16) * vl; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr); -} - -void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( - const Instruction* instr) { - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - VectorFormat vform = kFormatUndefined; - - switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) { - case STNT1B_z_p_br_contiguous: - vform = kFormatVnB; - break; - case STNT1D_z_p_br_contiguous: - vform = kFormatVnD; - break; - case STNT1H_z_p_br_contiguous: - vform = kFormatVnH; - break; - case STNT1W_z_p_br_contiguous: - vform = kFormatVnS; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); - uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr); -} - -void Simulator::VisitSVEContiguousStore_ScalarPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) { - case ST1B_z_p_bi: - case ST1D_z_p_bi: - case ST1H_z_p_bi: - case ST1W_z_p_bi: { - int vl = GetVectorLengthInBytes(); - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false); - VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); - int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; - uint64_t offset = - (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); - VectorFormat vform = - SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredStoreHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEContiguousStore_ScalarPlusScalar( - const Instruction* instr) { - switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) { - case ST1B_z_p_br: - case ST1D_z_p_br: - case ST1H_z_p_br: - case ST1W_z_p_br: { - uint64_t offset = ReadXRegister(instr->GetRm()); - offset <<= instr->ExtractBits(24, 23); - VectorFormat vform = - SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21)); - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23)); - SVEStructuredStoreHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister z_result; - - switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { - case CPY_z_p_v: - dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0); - mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm( - const Instruction* instr) { - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) { - case ST2B_z_p_bi_contiguous: - case ST2D_z_p_bi_contiguous: - case ST2H_z_p_bi_contiguous: - case ST2W_z_p_bi_contiguous: - case ST3B_z_p_bi_contiguous: - case ST3D_z_p_bi_contiguous: - case ST3H_z_p_bi_contiguous: - case ST3W_z_p_bi_contiguous: - case ST4B_z_p_bi_contiguous: - case ST4D_z_p_bi_contiguous: - case ST4H_z_p_bi_contiguous: - case ST4W_z_p_bi_contiguous: { - int vl = GetVectorLengthInBytes(); - int msz = instr->ExtractBits(24, 23); - int reg_count = instr->ExtractBits(22, 21) + 1; - uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count; - LogicSVEAddressVector addr( - ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); - addr.SetMsizeInBytesLog2(msz); - addr.SetRegCount(reg_count); - SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz), - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar( - const Instruction* instr) { - switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) { - case ST2B_z_p_br_contiguous: - case ST2D_z_p_br_contiguous: - case ST2H_z_p_br_contiguous: - case ST2W_z_p_br_contiguous: - case ST3B_z_p_br_contiguous: - case ST3D_z_p_br_contiguous: - case ST3H_z_p_br_contiguous: - case ST3W_z_p_br_contiguous: - case ST4B_z_p_br_contiguous: - case ST4D_z_p_br_contiguous: - case ST4H_z_p_br_contiguous: - case ST4W_z_p_br_contiguous: { - int msz = instr->ExtractBits(24, 23); - uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz); - LogicSVEAddressVector addr( - ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset); - addr.SetMsizeInBytesLog2(msz); - addr.SetRegCount(instr->ExtractBits(22, 21) + 1); - SVEStructuredStoreHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) { - switch (instr->Mask(SVEStorePredicateRegisterMask)) { - case STR_p_bi: { - SimPRegister& pt = ReadPRegister(instr->GetPt()); - int pl = GetPredicateLengthInBytes(); - int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); - uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); - uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl; - for (int i = 0; i < pl; i++) { - Memory::Write(address + i, pt.GetLane<uint8_t>(i)); - } - LogPWrite(instr->GetPt(), address); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) { - switch (instr->Mask(SVEStoreVectorRegisterMask)) { - case STR_z_bi: { - SimVRegister& zt = ReadVRegister(instr->GetRt()); - int vl = GetVectorLengthInBytes(); - int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10); - uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9); - uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl; - for (int i = 0; i < vl; i++) { - Memory::Write(address + i, zt.GetLane<uint8_t>(i)); - } - LogZWrite(instr->GetRt(), address); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEMulIndex(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zda = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - - switch (instr->Mask(SVEMulIndexMask)) { - case SDOT_z_zzzi_d: - sdot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(19, 16)), - instr->ExtractBit(20)); - break; - case SDOT_z_zzzi_s: - sdot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(18, 16)), - instr->ExtractBits(20, 19)); - break; - case UDOT_z_zzzi_d: - udot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(19, 16)), - instr->ExtractBit(20)); - break; - case UDOT_z_zzzi_s: - udot(vform, - zda, - zn, - ReadVRegister(instr->ExtractBits(18, 16)), - instr->ExtractBits(20, 19)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) { - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - SimPRegister result; - - switch (instr->Mask(SVEPartitionBreakConditionMask)) { - case BRKAS_p_p_p_z: - case BRKA_p_p_p: - brka(result, pg, pn); - break; - case BRKBS_p_p_p_z: - case BRKB_p_p_p: - brkb(result, pg, pn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (instr->ExtractBit(4) == 1) { - mov_merging(pd, pg, result); - } else { - mov_zeroing(pd, pg, result); - } - - // Set flag if needed. - if (instr->ExtractBit(22) == 1) { - PredTest(kFormatVnB, pg, pd); - } -} - -void Simulator::VisitSVEPropagateBreakToNextPartition( - const Instruction* instr) { - SimPRegister& pdm = ReadPRegister(instr->GetPd()); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - - switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) { - case BRKNS_p_p_pp: - case BRKN_p_p_pp: - brkn(pdm, pg, pn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - // Set flag if needed. - if (instr->ExtractBit(22) == 1) { - // Note that this ignores `pg`. - PredTest(kFormatVnB, GetPTrue(), pdm); - } -} - -void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) { - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - - SimVRegister temp = Simulator::ExpandToSimVRegister(pn); - SimVRegister zero; - dup_immediate(kFormatVnB, zero, 0); - - switch (instr->Mask(SVEUnpackPredicateElementsMask)) { - case PUNPKHI_p_p: - zip2(kFormatVnB, temp, temp, zero); - break; - case PUNPKLO_p_p: - zip1(kFormatVnB, temp, temp, zero); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp); -} - -void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - SimPRegister& pm = ReadPRegister(instr->GetPm()); - - SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn); - SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm); - - switch (instr->Mask(SVEPermutePredicateElementsMask)) { - case TRN1_p_pp: - trn1(vform, temp0, temp0, temp1); - break; - case TRN2_p_pp: - trn2(vform, temp0, temp0, temp1); - break; - case UZP1_p_pp: - uzp1(vform, temp0, temp0, temp1); - break; - case UZP2_p_pp: - uzp2(vform, temp0, temp0, temp1); - break; - case ZIP1_p_pp: - zip1(vform, temp0, temp0, temp1); - break; - case ZIP2_p_pp: - zip2(vform, temp0, temp0, temp1); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0); -} - -void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) { - switch (instr->Mask(SVEReversePredicateElementsMask)) { - case REV_p_p: { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimVRegister temp = Simulator::ExpandToSimVRegister(pn); - rev(vform, temp, temp); - Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) { - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - // Second source register "Zm" is encoded where "Zn" would usually be. - SimVRegister& zm = ReadVRegister(instr->GetRn()); - - const int imm8h_mask = 0x001F0000; - const int imm8l_mask = 0x00001C00; - int index = instr->ExtractBits<imm8h_mask | imm8l_mask>(); - int vl = GetVectorLengthInBytes(); - index = (index >= vl) ? 0 : index; - - switch (instr->Mask(SVEPermuteVectorExtractMask)) { - case EXT_z_zi_des: - ext(kFormatVnB, zdn, zdn, zm, index); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - switch (instr->Mask(SVEPermuteVectorInterleavingMask)) { - case TRN1_z_zz: - trn1(vform, zd, zn, zm); - break; - case TRN2_z_zz: - trn2(vform, zd, zn, zm); - break; - case UZP1_z_zz: - uzp1(vform, zd, zn, zm); - break; - case UZP2_z_zz: - uzp2(vform, zd, zn, zm); - break; - case ZIP1_z_zz: - zip1(vform, zd, zn, zm); - break; - case ZIP2_z_zz: - zip2(vform, zd, zn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEConditionallyBroadcastElementToVector( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - int active_offset = -1; - switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) { - case CLASTA_z_p_zz: - active_offset = 1; - break; - case CLASTB_z_p_zz: - active_offset = 0; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (active_offset >= 0) { - std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); - if (value.first) { - dup_immediate(vform, zdn, value.second); - } else { - // Trigger a line of trace for the operation, even though it doesn't - // change the register value. - mov(vform, zdn, zdn); - } - } -} - -void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& vdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - int active_offset = -1; - switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) { - case CLASTA_v_p_z: - active_offset = 1; - break; - case CLASTB_v_p_z: - active_offset = 0; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (active_offset >= 0) { - LogicVRegister dst(vdn); - uint64_t src1_value = dst.Uint(vform, 0); - std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset); - dup_immediate(vform, vdn, 0); - dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value); - } -} - -void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - int active_offset = -1; - switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) { - case CLASTA_r_p_z: - active_offset = 1; - break; - case CLASTB_r_p_z: - active_offset = 0; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (active_offset >= 0) { - std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); - uint64_t masked_src = ReadXRegister(instr->GetRd()) & - GetUintMask(LaneSizeInBitsFromFormat(vform)); - WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src); - } -} - -void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& vdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - int active_offset = -1; - switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) { - case LASTA_v_p_z: - active_offset = 1; - break; - case LASTB_v_p_z: - active_offset = 0; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (active_offset >= 0) { - LogicVRegister dst(vdn); - std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); - dup_immediate(vform, vdn, 0); - dst.SetUint(vform, 0, value.second); - } -} - -void Simulator::VisitSVEExtractElementToGeneralRegister( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - int active_offset = -1; - switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) { - case LASTA_r_p_z: - active_offset = 1; - break; - case LASTB_r_p_z: - active_offset = 0; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (active_offset >= 0) { - std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset); - WriteXRegister(instr->GetRd(), value.second); - } -} - -void Simulator::VisitSVECompressActiveElements(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - switch (instr->Mask(SVECompressActiveElementsMask)) { - case COMPACT_z_p_z: - compact(vform, zd, pg, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated( - const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister z_result; - - switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { - case CPY_z_p_r: - dup_immediate(vform, - z_result, - ReadXRegister(instr->GetRn(), Reg31IsStackPointer)); - mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16)); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - - SimVRegister result; - switch (instr->Mask(SVECopyIntImm_PredicatedMask)) { - case CPY_z_p_i: { - // Use unsigned arithmetic to avoid undefined behaviour during the shift. - uint64_t imm8 = instr->GetImmSVEIntWideSigned(); - dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8)); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (instr->ExtractBit(14) != 0) { - mov_merging(vform, zd, pg, result); - } else { - mov_zeroing(vform, zd, pg, result); - } -} - -void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - SimVRegister result; - - // In NEON, the chunk size in which elements are REVersed is in the - // instruction mnemonic, and the element size attached to the register. - // SVE reverses the semantics; the mapping to logic functions below is to - // account for this. - VectorFormat chunk_form = instr->GetSVEVectorFormat(); - VectorFormat element_form = kFormatUndefined; - - switch (instr->Mask(SVEReverseWithinElementsMask)) { - case RBIT_z_p_z: - rbit(chunk_form, result, zn); - break; - case REVB_z_z: - VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) || - (chunk_form == kFormatVnD)); - element_form = kFormatVnB; - break; - case REVH_z_z: - VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD)); - element_form = kFormatVnH; - break; - case REVW_z_z: - VIXL_ASSERT(chunk_form == kFormatVnD); - element_form = kFormatVnS; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) { - VIXL_ASSERT(element_form != kFormatUndefined); - switch (chunk_form) { - case kFormatVnH: - rev16(element_form, result, zn); - break; - case kFormatVnS: - rev32(element_form, result, zn); - break; - case kFormatVnD: - rev64(element_form, result, zn); - break; - default: - VIXL_UNIMPLEMENTED(); - } - } - - mov_merging(chunk_form, zd, pg, result); -} - -void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zdn = ReadVRegister(instr->GetRd()); - SimVRegister& zm = ReadVRegister(instr->GetRn()); - SimPRegister& pg = ReadPRegister(instr->GetPgLow8()); - - switch (instr->Mask(SVEVectorSplice_DestructiveMask)) { - case SPLICE_z_p_zz_des: - splice(vform, zdn, pg, zdn, zm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) { - case DUP_z_r: - dup_immediate(instr->GetSVEVectorFormat(), - zd, - ReadXRegister(instr->GetRn(), Reg31IsStackPointer)); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) { - case INSR_z_v: - insr(vform, zd, ReadDRegisterBits(instr->GetRn())); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - switch (instr->Mask(SVEInsertGeneralRegisterMask)) { - case INSR_z_r: - insr(vform, zd, ReadXRegister(instr->GetRn())); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVEBroadcastIndexElementMask)) { - case DUP_z_zi: { - std::pair<int, int> index_and_lane_size = - instr->GetSVEPermuteIndexAndLaneSizeLog2(); - int index = index_and_lane_size.first; - int lane_size_in_bytes_log_2 = index_and_lane_size.second; - VectorFormat vform = - SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2); - if ((index < 0) || (index >= LaneCountFromFormat(vform))) { - // Out of bounds, set the destination register to zero. - dup_immediate(kFormatVnD, zd, 0); - } else { - dup_element(vform, zd, ReadVRegister(instr->GetRn()), index); - } - return; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - switch (instr->Mask(SVEReverseVectorElementsMask)) { - case REV_z_z: - rev(vform, zd, ReadVRegister(instr->GetRn())); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - switch (instr->Mask(SVEUnpackVectorElementsMask)) { - case SUNPKHI_z_z: - unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend); - break; - case SUNPKLO_z_z: - unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend); - break; - case UUNPKHI_z_z: - unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend); - break; - case UUNPKLO_z_z: - unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVETableLookup(const Instruction* instr) { - SimVRegister& zd = ReadVRegister(instr->GetRd()); - switch (instr->Mask(SVETableLookupMask)) { - case TBL_z_zz_1: - Table(instr->GetSVEVectorFormat(), - zd, - ReadVRegister(instr->GetRn()), - ReadVRegister(instr->GetRm())); - return; - default: - break; - } -} - -void Simulator::VisitSVEPredicateCount(const Instruction* instr) { - VectorFormat vform = instr->GetSVEVectorFormat(); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - - switch (instr->Mask(SVEPredicateCountMask)) { - case CNTP_r_p_p: { - WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn)); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPredicateLogical(const Instruction* instr) { - Instr op = instr->Mask(SVEPredicateLogicalMask); - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - SimPRegister& pm = ReadPRegister(instr->GetPm()); - SimPRegister result; - switch (op) { - case ANDS_p_p_pp_z: - case AND_p_p_pp_z: - case BICS_p_p_pp_z: - case BIC_p_p_pp_z: - case EORS_p_p_pp_z: - case EOR_p_p_pp_z: - case NANDS_p_p_pp_z: - case NAND_p_p_pp_z: - case NORS_p_p_pp_z: - case NOR_p_p_pp_z: - case ORNS_p_p_pp_z: - case ORN_p_p_pp_z: - case ORRS_p_p_pp_z: - case ORR_p_p_pp_z: - SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op), - result, - pn, - pm); - break; - case SEL_p_p_pp: - sel(pd, pg, pn, pm); - return; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - mov_zeroing(pd, pg, result); - if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) { - PredTest(kFormatVnB, pg, pd); - } -} - -void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) { - LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5)); - LogicPRegister pdn = ReadPRegister(instr->GetPd()); - switch (instr->Mask(SVEPredicateFirstActiveMask)) { - case PFIRST_p_p_p: - pfirst(pdn, pg, pdn); - // TODO: Is this broken when pg == pdn? - PredTest(kFormatVnB, pg, pdn); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) { - // This group only contains PTRUE{S}, and there are no unallocated encodings. - VIXL_STATIC_ASSERT( - SVEPredicateInitializeMask == - (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit)); - VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) || - (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s)); - - LogicPRegister pdn = ReadPRegister(instr->GetPd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - - ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint()); - if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn); -} - -void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) { - // This group only contains PNEXT, and there are no unallocated encodings. - VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask); - VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p); - - LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5)); - LogicPRegister pdn = ReadPRegister(instr->GetPd()); - VectorFormat vform = instr->GetSVEVectorFormat(); - - pnext(vform, pdn, pg, pdn); - // TODO: Is this broken when pg == pdn? - PredTest(vform, pg, pdn); -} - -void Simulator::VisitSVEPredicateReadFromFFR_Predicated( - const Instruction* instr) { - LogicPRegister pd(ReadPRegister(instr->GetPd())); - LogicPRegister pg(ReadPRegister(instr->GetPn())); - FlagsUpdate flags = LeaveFlags; - switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) { - case RDFFR_p_p_f: - // Do nothing. - break; - case RDFFRS_p_p_f: - flags = SetFlags; - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - LogicPRegister ffr(ReadFFR()); - mov_zeroing(pd, pg, ffr); - - if (flags == SetFlags) { - PredTest(kFormatVnB, pg, pd); - } -} - -void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated( - const Instruction* instr) { - LogicPRegister pd(ReadPRegister(instr->GetPd())); - LogicPRegister ffr(ReadFFR()); - switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) { - case RDFFR_p_f: - mov(pd, ffr); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPredicateTest(const Instruction* instr) { - switch (instr->Mask(SVEPredicateTestMask)) { - case PTEST_p_p: - PredTest(kFormatVnB, - ReadPRegister(instr->ExtractBits(13, 10)), - ReadPRegister(instr->GetPn())); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPredicateZero(const Instruction* instr) { - switch (instr->Mask(SVEPredicateZeroMask)) { - case PFALSE_p: - pfalse(ReadPRegister(instr->GetPd())); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEPropagateBreak(const Instruction* instr) { - SimPRegister& pd = ReadPRegister(instr->GetPd()); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimPRegister& pn = ReadPRegister(instr->GetPn()); - SimPRegister& pm = ReadPRegister(instr->GetPm()); - - bool set_flags = false; - switch (instr->Mask(SVEPropagateBreakMask)) { - case BRKPAS_p_p_pp: - set_flags = true; - VIXL_FALLTHROUGH(); - case BRKPA_p_p_pp: - brkpa(pd, pg, pn, pm); - break; - case BRKPBS_p_p_pp: - set_flags = true; - VIXL_FALLTHROUGH(); - case BRKPB_p_p_pp: - brkpb(pd, pg, pn, pm); - break; - default: - VIXL_UNIMPLEMENTED(); - break; - } - - if (set_flags) { - PredTest(kFormatVnB, pg, pd); - } -} - -void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) { - uint64_t length = 0; - switch (instr->Mask(SVEStackFrameAdjustmentMask)) { - case ADDPL_r_ri: - length = GetPredicateLengthInBytes(); - break; - case ADDVL_r_ri: - length = GetVectorLengthInBytes(); - break; - default: - VIXL_UNIMPLEMENTED(); - } - uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer); - WriteXRegister(instr->GetRd(), - base + (length * instr->GetImmSVEVLScale()), - LogRegWrites, - Reg31IsStackPointer); -} - -void Simulator::VisitSVEStackFrameSize(const Instruction* instr) { - int64_t scale = instr->GetImmSVEVLScale(); - - switch (instr->Mask(SVEStackFrameSizeMask)) { - case RDVL_r_i: - WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale); - break; - default: - VIXL_UNIMPLEMENTED(); - } -} - -void Simulator::VisitSVEVectorSelect(const Instruction* instr) { - // The only instruction in this group is `sel`, and there are no unused - // encodings. - VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz); - - VectorFormat vform = instr->GetSVEVectorFormat(); - SimVRegister& zd = ReadVRegister(instr->GetRd()); - SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10)); - SimVRegister& zn = ReadVRegister(instr->GetRn()); - SimVRegister& zm = ReadVRegister(instr->GetRm()); - - sel(vform, zd, pg, zn, zm); -} - -void Simulator::VisitSVEFFRInitialise(const Instruction* instr) { - switch (instr->Mask(SVEFFRInitialiseMask)) { - case SETFFR_f: { - LogicPRegister ffr(ReadFFR()); - ffr.SetAllBits(); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) { - switch (instr->Mask(SVEFFRWriteFromPredicateMask)) { - case WRFFR_f_p: { - SimPRegister pn(ReadPRegister(instr->GetPn())); - bool last_active = true; - for (unsigned i = 0; i < pn.GetSizeInBits(); i++) { - bool active = pn.GetBit(i); - if (active && !last_active) { - // `pn` is non-monotonic. This is UNPREDICTABLE. - VIXL_ABORT(); - } - last_active = active; - } - mov(ReadFFR(), pn); - break; - } - default: - VIXL_UNIMPLEMENTED(); - break; - } -} - -void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) { - bool is_signed; - switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) { - case LD1B_z_p_bi_u8: - case LD1B_z_p_bi_u16: - case LD1B_z_p_bi_u32: - case LD1B_z_p_bi_u64: - case LD1H_z_p_bi_u16: - case LD1H_z_p_bi_u32: - case LD1H_z_p_bi_u64: - case LD1W_z_p_bi_u32: - case LD1W_z_p_bi_u64: - case LD1D_z_p_bi_u64: - is_signed = false; - break; - case LD1SB_z_p_bi_s16: - case LD1SB_z_p_bi_s32: - case LD1SB_z_p_bi_s64: - case LD1SH_z_p_bi_s32: - case LD1SH_z_p_bi_s64: - case LD1SW_z_p_bi_s64: - is_signed = true; - break; - default: - // This encoding group is complete, so no other values should be possible. - VIXL_UNREACHABLE(); - is_signed = false; - break; - } - - int vl = GetVectorLengthInBytes(); - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); - VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); - int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2; - uint64_t offset = - (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - is_signed); -} - -void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar( - const Instruction* instr) { - bool is_signed; - switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) { - case LD1B_z_p_br_u8: - case LD1B_z_p_br_u16: - case LD1B_z_p_br_u32: - case LD1B_z_p_br_u64: - case LD1H_z_p_br_u16: - case LD1H_z_p_br_u32: - case LD1H_z_p_br_u64: - case LD1W_z_p_br_u32: - case LD1W_z_p_br_u64: - case LD1D_z_p_br_u64: - is_signed = false; - break; - case LD1SB_z_p_br_s16: - case LD1SB_z_p_br_s32: - case LD1SB_z_p_br_s64: - case LD1SH_z_p_br_s32: - case LD1SH_z_p_br_s64: - case LD1SW_z_p_br_s64: - is_signed = true; - break; - default: - // This encoding group is complete, so no other values should be possible. - VIXL_UNREACHABLE(); - is_signed = false; - break; - } - - int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed); - int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed); - VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2); - VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2); - uint64_t offset = ReadXRegister(instr->GetRm()); - offset <<= msize_in_bytes_log2; - LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset); - addr.SetMsizeInBytesLog2(msize_in_bytes_log2); - SVEStructuredLoadHelper(vform, - ReadPRegister(instr->GetPgLow8()), - instr->GetRt(), - addr, - is_signed); -} void Simulator::DoUnreachable(const Instruction* instr) { VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) && diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 1a89dff7..7cb7419a 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -37,6 +37,7 @@ #include "cpu-features-auditor-aarch64.h" #include "disasm-aarch64.h" #include "instructions-aarch64.h" +#include "instrument-aarch64.h" #include "simulator-constants-aarch64.h" #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 @@ -76,22 +77,6 @@ class Memory { return value; } - template <typename A> - static uint64_t Read(int size_in_bytes, A address) { - switch (size_in_bytes) { - case 1: - return Read<uint8_t>(address); - case 2: - return Read<uint16_t>(address); - case 4: - return Read<uint32_t>(address); - case 8: - return Read<uint64_t>(address); - } - VIXL_UNREACHABLE(); - return 0; - } - template <typename T, typename A> static void Write(A address, T value) { address = AddressUntag(address); @@ -102,33 +87,19 @@ class Memory { } }; -// Represent a register (r0-r31, v0-v31, z0-z31, p0-p15). -template <unsigned kMaxSizeInBits> +// Represent a register (r0-r31, v0-v31). +template <int kSizeInBytes> class SimRegisterBase { public: - static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte; - VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits); - - SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); } - - unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; } - unsigned GetSizeInBytes() const { return size_in_bytes_; } - - void SetSizeInBytes(unsigned size_in_bytes) { - VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes); - size_in_bytes_ = size_in_bytes; - } - void SetSizeInBits(unsigned size_in_bits) { - VIXL_ASSERT(size_in_bits <= kMaxSizeInBits); - VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); - SetSizeInBytes(size_in_bits / kBitsPerByte); - } + SimRegisterBase() : written_since_last_log_(false) {} // Write the specified value. The value is zero-extended if necessary. template <typename T> void Write(T new_value) { - // All AArch64 registers are zero-extending. - if (sizeof(new_value) < GetSizeInBytes()) Clear(); + if (sizeof(new_value) < kSizeInBytes) { + // All AArch64 registers are zero-extending. + memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value)); + } WriteLane(new_value, 0); NotifyRegisterWrite(); } @@ -137,11 +108,6 @@ class SimRegisterBase { Write(new_value); } - void Clear() { - memset(value_, 0, kMaxSizeInBytes); - NotifyRegisterWrite(); - } - // Insert a typed value into a register, leaving the rest of the register // unchanged. The lane parameter indicates where in the register the value // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where @@ -171,17 +137,6 @@ class SimRegisterBase { return GetLane(lane); } - // Get the value of a specific bit, indexed from the least-significant bit of - // lane 0. - bool GetBit(int bit) const { - int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte); - int byte = bit / (sizeof(value_[0]) * kBitsPerByte); - return ((value_[byte] >> bit_in_byte) & 1) != 0; - } - - // Return a pointer to the raw, underlying byte array. - const uint8_t* GetBytes() const { return value_; } - // TODO: Make this return a map of updated bytes, so that we can highlight // updated lanes for load-and-insert. (That never happens for scalar code, but // NEON has some instructions that can update individual lanes.) @@ -190,9 +145,7 @@ class SimRegisterBase { void NotifyRegisterLogged() { written_since_last_log_ = false; } protected: - uint8_t value_[kMaxSizeInBytes]; - - unsigned size_in_bytes_; + uint8_t value_[kSizeInBytes]; // Helpers to aid with register tracing. bool written_since_last_log_; @@ -203,152 +156,38 @@ class SimRegisterBase { template <typename T> void ReadLane(T* dst, int lane) const { VIXL_ASSERT(lane >= 0); - VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes()); + VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= kSizeInBytes); memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst)); } template <typename T> void WriteLane(T src, int lane) { VIXL_ASSERT(lane >= 0); - VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes()); + VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= kSizeInBytes); memcpy(&value_[lane * sizeof(src)], &src, sizeof(src)); } - - // The default ReadLane and WriteLane methods assume what we are copying is - // "trivially copyable" by using memcpy. We have to provide alternative - // implementations for SimFloat16 which cannot be copied this way. - - void ReadLane(vixl::internal::SimFloat16* dst, int lane) const { - uint16_t rawbits; - ReadLane(&rawbits, lane); - *dst = RawbitsToFloat16(rawbits); - } - - void WriteLane(vixl::internal::SimFloat16 src, int lane) { - WriteLane(Float16ToRawbits(src), lane); - } -}; - -typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31 -typedef SimRegisterBase<kPRegMaxSize> SimPRegister; // p0-p15 -// FFR has the same format as a predicate register. -typedef SimPRegister SimFFRRegister; - -// v0-v31 and z0-z31 -class SimVRegister : public SimRegisterBase<kZRegMaxSize> { - public: - SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {} - - void NotifyAccessAsZ() { accessed_as_z_ = true; } - - void NotifyRegisterLogged() { - SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged(); - accessed_as_z_ = false; - } - - bool AccessedAsZSinceLastLog() const { return accessed_as_z_; } - - private: - bool accessed_as_z_; -}; - -// Representation of a SVE predicate register. -class LogicPRegister { - public: - inline LogicPRegister( - SimPRegister& other) // NOLINT(runtime/references)(runtime/explicit) - : register_(other) {} - - // Set a conveniently-sized block to 16 bits as the minimum predicate length - // is 16 bits and allow to be increased to multiples of 16 bits. - typedef uint16_t ChunkType; - - // Assign a bit into the end positon of the specified lane. - // The bit is zero-extended if necessary. - void SetActive(VectorFormat vform, int lane_index, bool value) { - int psize = LaneSizeInBytesFromFormat(vform); - int bit_index = lane_index * psize; - int byte_index = bit_index / kBitsPerByte; - int bit_offset = bit_index % kBitsPerByte; - uint8_t byte = register_.GetLane<uint8_t>(byte_index); - register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value)); - } - - bool IsActive(VectorFormat vform, int lane_index) const { - int psize = LaneSizeInBytesFromFormat(vform); - int bit_index = lane_index * psize; - int byte_index = bit_index / kBitsPerByte; - int bit_offset = bit_index % kBitsPerByte; - uint8_t byte = register_.GetLane<uint8_t>(byte_index); - return ExtractBit(byte, bit_offset); - } - - // The accessors for bulk processing. - int GetChunkCount() const { - VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0); - return register_.GetSizeInBytes() / sizeof(ChunkType); - } - - ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); } - - void SetChunk(int lane, ChunkType new_value) { - SetActiveMask(lane, new_value); - } - - void SetAllBits() { - int chunk_size = sizeof(ChunkType) * kBitsPerByte; - ChunkType bits = GetUintMask(chunk_size); - for (int lane = 0; - lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size)); - lane++) { - SetChunk(lane, bits); - } - } - - template <typename T> - T GetActiveMask(int lane) const { - return register_.GetLane<T>(lane); - } - - template <typename T> - void SetActiveMask(int lane, T new_value) { - register_.Insert<T>(lane, new_value); - } - - void Clear() { register_.Clear(); } - - bool Aliases(const LogicPRegister& other) const { - return ®ister_ == &other.register_; - } - - private: - // The bit assignment is zero-extended to fill the size of predicate element. - uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) { - VIXL_ASSERT(index >= 0); - VIXL_ASSERT(index + psize <= kBitsPerByte); - int bits = value ? 1 : 0; - switch (psize) { - case 1: - AssignBit(byte, index, bits); - break; - case 2: - AssignBits(byte, index, 0x03, bits); - break; - case 4: - AssignBits(byte, index, 0x0f, bits); - break; - case 8: - AssignBits(byte, index, 0xff, bits); - break; - default: - VIXL_UNREACHABLE(); - return 0; - } - return byte; - } - - SimPRegister& register_; }; +typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31 +typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31 + +// The default ReadLane and WriteLane methods assume what we are copying is +// "trivially copyable" by using memcpy. We have to provide alternative +// implementations for SimFloat16 which cannot be copied this way. + +template <> +template <> +inline void SimVRegister::ReadLane(vixl::internal::SimFloat16* dst, + int lane) const { + uint16_t rawbits; + ReadLane(&rawbits, lane); + *dst = RawbitsToFloat16(rawbits); +} + +template <> +template <> +inline void SimVRegister::WriteLane(vixl::internal::SimFloat16 src, int lane) { + WriteLane(Float16ToRawbits(src), lane); +} // Representation of a vector register, with typed getters and setters for lanes // and additional information to represent lane state. @@ -366,7 +205,6 @@ class LogicVRegister { } int64_t Int(VectorFormat vform, int index) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); int64_t element; switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -389,7 +227,6 @@ class LogicVRegister { } uint64_t Uint(VectorFormat vform, int index) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); uint64_t element; switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -423,7 +260,6 @@ class LogicVRegister { } void SetInt(VectorFormat vform, int index, int64_t value) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, static_cast<int8_t>(value)); @@ -451,7 +287,6 @@ class LogicVRegister { } void SetUint(VectorFormat vform, int index, uint64_t value) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, static_cast<uint8_t>(value)); @@ -478,98 +313,7 @@ class LogicVRegister { } } - void ReadIntFromMem(VectorFormat vform, - unsigned msize_in_bits, - int index, - uint64_t addr) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); - int64_t value; - switch (msize_in_bits) { - case 8: - value = Memory::Read<int8_t>(addr); - break; - case 16: - value = Memory::Read<int16_t>(addr); - break; - case 32: - value = Memory::Read<int32_t>(addr); - break; - case 64: - value = Memory::Read<int64_t>(addr); - break; - default: - VIXL_UNREACHABLE(); - return; - } - - unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform); - VIXL_ASSERT(esize_in_bits >= msize_in_bits); - switch (esize_in_bits) { - case 8: - register_.Insert(index, static_cast<int8_t>(value)); - break; - case 16: - register_.Insert(index, static_cast<int16_t>(value)); - break; - case 32: - register_.Insert(index, static_cast<int32_t>(value)); - break; - case 64: - register_.Insert(index, static_cast<int64_t>(value)); - break; - default: - VIXL_UNREACHABLE(); - return; - } - } - - void ReadUintFromMem(VectorFormat vform, - unsigned msize_in_bits, - int index, - uint64_t addr) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); - uint64_t value; - switch (msize_in_bits) { - case 8: - value = Memory::Read<uint8_t>(addr); - break; - case 16: - value = Memory::Read<uint16_t>(addr); - break; - case 32: - value = Memory::Read<uint32_t>(addr); - break; - case 64: - value = Memory::Read<uint64_t>(addr); - break; - default: - VIXL_UNREACHABLE(); - return; - } - - unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform); - VIXL_ASSERT(esize_in_bits >= msize_in_bits); - switch (esize_in_bits) { - case 8: - register_.Insert(index, static_cast<uint8_t>(value)); - break; - case 16: - register_.Insert(index, static_cast<uint16_t>(value)); - break; - case 32: - register_.Insert(index, static_cast<uint32_t>(value)); - break; - case 64: - register_.Insert(index, static_cast<uint64_t>(value)); - break; - default: - VIXL_UNREACHABLE(); - return; - } - } - void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); switch (LaneSizeInBitsFromFormat(vform)) { case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); @@ -590,7 +334,6 @@ class LogicVRegister { } void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); uint64_t value = Uint(vform, index); switch (LaneSizeInBitsFromFormat(vform)) { case 8: @@ -618,20 +361,11 @@ class LogicVRegister { register_.Insert(index, value); } - template <typename T> - void SetFloat(VectorFormat vform, int index, T value) const { - if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); - register_.Insert(index, value); - } - - // When setting a result in a register larger than the result itself, the top - // bits of the register must be cleared. + // When setting a result in a register of size less than Q, the top bits of + // the Q register must be cleared. void ClearForWrite(VectorFormat vform) const { - // SVE destinations write whole registers, so we have nothing to clear. - if (IsSVEFormat(vform)) return; - unsigned size = RegisterSizeInBytesFromFormat(vform); - for (unsigned i = size; i < register_.GetSizeInBytes(); i++) { + for (unsigned i = size; i < kQRegSizeInBytes; i++) { SetUint(kFormat16B, i, 0); } } @@ -747,129 +481,15 @@ class LogicVRegister { return *this; } - int LaneCountFromFormat(VectorFormat vform) const { - if (IsSVEFormat(vform)) { - return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform); - } else { - return vixl::aarch64::LaneCountFromFormat(vform); - } - } - private: SimVRegister& register_; // Allocate one saturation state entry per lane; largest register is type Q, // and lanes can be a minimum of one byte wide. - Saturation saturated_[kZRegMaxSizeInBytes]; + Saturation saturated_[kQRegSizeInBytes]; // Allocate one rounding state entry per lane. - bool round_[kZRegMaxSizeInBytes]; -}; - -// Represent an SVE addressing mode and abstract per-lane address generation to -// make iteration easy. -// -// Contiguous accesses are described with a simple base address, the memory -// occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in -// each struct (`SetRegCount()`). -// -// Scatter-gather accesses also require a SimVRegister and information about how -// to extract lanes from it. -class LogicSVEAddressVector { - public: - // scalar-plus-scalar - // scalar-plus-immediate - explicit LogicSVEAddressVector(uint64_t base) - : base_(base), - msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), - reg_count_(1), - vector_(NULL), - vector_form_(kFormatUndefined), - vector_mod_(NO_SVE_OFFSET_MODIFIER), - vector_shift_(0) {} - - // scalar-plus-vector - // vector-plus-immediate - // `base` should be the constant used for each element. That is, the value - // of `xn`, or `#<imm>`. - // `vector` should be the SimVRegister with offsets for each element. The - // vector format must be specified; SVE scatter/gather accesses typically - // support both 32-bit and 64-bit addressing. - // - // `mod` and `shift` correspond to the modifiers applied to each element in - // scalar-plus-vector forms, such as those used for unpacking and - // sign-extension. They are not used for vector-plus-immediate. - LogicSVEAddressVector(uint64_t base, - const SimVRegister* vector, - VectorFormat vform, - SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER, - int shift = 0) - : base_(base), - msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), - reg_count_(1), - vector_(vector), - vector_form_(vform), - vector_mod_(mod), - vector_shift_(shift) {} - - // Set `msize` -- the memory occupied by each lane -- for address - // calculations. - void SetMsizeInBytesLog2(int msize_in_bytes_log2) { - VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2)); - VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2)); - msize_in_bytes_log2_ = msize_in_bytes_log2; - } - - bool HasMsize() const { - return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2; - } - - int GetMsizeInBytesLog2() const { - VIXL_ASSERT(HasMsize()); - return msize_in_bytes_log2_; - } - int GetMsizeInBitsLog2() const { - return GetMsizeInBytesLog2() + kBitsPerByteLog2; - } - - int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); } - int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); } - - void SetRegCount(int reg_count) { - VIXL_ASSERT(reg_count >= 1); // E.g. ld1/st1 - VIXL_ASSERT(reg_count <= 4); // E.g. ld4/st4 - reg_count_ = reg_count; - } - - int GetRegCount() const { return reg_count_; } - - // Full per-element address calculation for structured accesses. - // - // Note that the register number argument (`reg`) is zero-based. - uint64_t GetElementAddress(int lane, int reg) const { - VIXL_ASSERT(reg < GetRegCount()); - // Individual structures are always contiguous in memory, so this - // implementation works for both contiguous and scatter-gather addressing. - return GetStructAddress(lane) + (reg * GetMsizeInBytes()); - } - - // Full per-struct address calculation for structured accesses. - uint64_t GetStructAddress(int lane) const; - - bool IsContiguous() const { return vector_ == NULL; } - bool IsScatterGather() const { return !IsContiguous(); } - - private: - uint64_t base_; - int msize_in_bytes_log2_; - int reg_count_; - - const SimVRegister* vector_; - VectorFormat vector_form_; - SVEOffsetModifier vector_mod_; - int vector_shift_; - - static const int kUnknownMsizeInBytesLog2 = -1; + bool round_[kQRegSizeInBytes]; }; // The proper way to initialize a simulated system register (such as NZCV) is as @@ -1113,11 +733,6 @@ class Simulator : public DecoderVisitor { VIXL_ASSERT(IsWordAligned(pc_)); pc_modified_ = false; - if (movprfx_ != NULL) { - VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_)); - movprfx_ = NULL; - } - // On guarded pages, if BType is not zero, take an exception on any // instruction other than BTI, PACI[AB]SP, HLT or BRK. if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) { @@ -1159,6 +774,13 @@ class Simulator : public DecoderVisitor { #undef DECLARE +#define DECLARE(A) \ + VIXL_NO_RETURN_IN_DEBUG_MODE virtual void Visit##A(const Instruction* instr) \ + VIXL_OVERRIDE; + VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(DECLARE) +#undef DECLARE + + // Integer register accessors. // Basic accessor: Read the register as the specified type. @@ -1205,13 +827,6 @@ class Simulator : public DecoderVisitor { return ReadXRegister(code, r31mode); } - SimPRegister& ReadPRegister(unsigned code) { - VIXL_ASSERT(code < kNumberOfPRegisters); - return pregisters_[code]; - } - - SimFFRRegister& ReadFFR() { return ffr_register_; } - // As above, with parameterized size and return type. The value is // either zero-extended or truncated to fit, as required. template <typename T> @@ -1262,10 +877,6 @@ class Simulator : public DecoderVisitor { // Write 'value' into an integer register. The value is zero-extended. This // behaviour matches AArch64 register writes. - // - // SP may be specified in one of two ways: - // - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister) - // - (code == 31) && (r31mode == Reg31IsStackPointer) template <typename T> void WriteRegister(unsigned code, T value, @@ -1285,25 +896,20 @@ class Simulator : public DecoderVisitor { VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) || (sizeof(T) == kXRegSizeInBytes)); VIXL_ASSERT( - (code < kNumberOfRegisters) || + code < kNumberOfRegisters || ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode))); - if (code == 31) { - if (r31mode == Reg31IsZeroRegister) { - // Discard writes to the zero register. - return; - } else { - code = kSPRegInternalCode; - } + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { + return; } - // registers_[31] is the stack pointer. - VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31); - registers_[code % kNumberOfRegisters].Write(value); - - if (log_mode == LogRegWrites) { - LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T))); + if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) { + code = 31; } + + registers_[code].Write(value); + + if (log_mode == LogRegWrites) LogRegister(code, r31mode); } template <typename T> VIXL_DEPRECATED("WriteRegister", @@ -1409,11 +1015,6 @@ class Simulator : public DecoderVisitor { uint8_t val[kQRegSizeInBytes]; }; - // A structure for representing a SVE Z register. - struct zreg_t { - uint8_t val[kZRegMaxSizeInBytes]; - }; - // Basic accessor: read the register as the specified type. template <typename T> T ReadVRegister(unsigned code) const { @@ -1529,8 +1130,7 @@ class Simulator : public DecoderVisitor { (sizeof(value) == kHRegSizeInBytes) || (sizeof(value) == kSRegSizeInBytes) || (sizeof(value) == kDRegSizeInBytes) || - (sizeof(value) == kQRegSizeInBytes) || - (sizeof(value) == kZRegMaxSizeInBytes)); + (sizeof(value) == kQRegSizeInBytes)); VIXL_ASSERT(code < kNumberOfVRegisters); vregisters_[code].Write(value); @@ -1637,12 +1237,6 @@ class Simulator : public DecoderVisitor { WriteQRegister(code, value, log_mode); } - void WriteZRegister(unsigned code, - zreg_t value, - RegLogMode log_mode = LogRegWrites) { - WriteVRegister(code, value, log_mode); - } - template <typename T> T ReadRegister(Register reg) const { return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister); @@ -1763,16 +1357,14 @@ class Simulator : public DecoderVisitor { kPrintRegLaneSizeD = 3 << 0, kPrintRegLaneSizeX = kPrintRegLaneSizeD, kPrintRegLaneSizeQ = 4 << 0, - kPrintRegLaneSizeUnknown = 5 << 0, kPrintRegLaneSizeOffset = 0, kPrintRegLaneSizeMask = 7 << 0, - // The overall register size. + // The lane count. kPrintRegAsScalar = 0, kPrintRegAsDVector = 1 << 3, kPrintRegAsQVector = 2 << 3, - kPrintRegAsSVEVector = 3 << 3, kPrintRegAsVectorMask = 3 << 3, @@ -1780,98 +1372,37 @@ class Simulator : public DecoderVisitor { // S-, H-, and D-sized lanes.) kPrintRegAsFP = 1 << 5, - // With this flag, print helpers won't check that the upper bits are zero. - // This also forces the register name to be printed with the `reg<msb:0>` - // format. - // - // The flag is supported with any PrintRegisterFormat other than those with - // kPrintRegAsSVEVector. - kPrintRegPartial = 1 << 6, - -// Supported combinations. -// These exist so that they can be referred to by name, but also because C++ -// does not allow enum types to hold values that aren't explicitly -// enumerated, and we want to be able to combine the above flags. - -// Scalar formats. -#define VIXL_DECL_PRINT_REG_SCALAR(size) \ - kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \ - kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial -#define VIXL_DECL_PRINT_REG_SCALAR_FP(size) \ - VIXL_DECL_PRINT_REG_SCALAR(size) \ - , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \ - kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP - VIXL_DECL_PRINT_REG_SCALAR(W), - VIXL_DECL_PRINT_REG_SCALAR(X), - VIXL_DECL_PRINT_REG_SCALAR_FP(H), - VIXL_DECL_PRINT_REG_SCALAR_FP(S), - VIXL_DECL_PRINT_REG_SCALAR_FP(D), - VIXL_DECL_PRINT_REG_SCALAR(Q), -#undef VIXL_DECL_PRINT_REG_SCALAR -#undef VIXL_DECL_PRINT_REG_SCALAR_FP - -#define VIXL_DECL_PRINT_REG_NEON(count, type, size) \ - kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \ - kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial -#define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size) \ - VIXL_DECL_PRINT_REG_NEON(count, type, size) \ - , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \ - kPrintReg##count##type##PartialFP = \ - kPrintReg##count##type##Partial | kPrintRegAsFP - VIXL_DECL_PRINT_REG_NEON(1, B, Scalar), - VIXL_DECL_PRINT_REG_NEON(8, B, DVector), - VIXL_DECL_PRINT_REG_NEON(16, B, QVector), - VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar), - VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector), - VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector), - VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar), - VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector), - VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector), - VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar), - VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector), - VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar), -#undef VIXL_DECL_PRINT_REG_NEON -#undef VIXL_DECL_PRINT_REG_NEON_FP - -#define VIXL_DECL_PRINT_REG_SVE(type) \ - kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \ - kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial -#define VIXL_DECL_PRINT_REG_SVE_FP(type) \ - VIXL_DECL_PRINT_REG_SVE(type) \ - , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \ - kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP - VIXL_DECL_PRINT_REG_SVE(B), - VIXL_DECL_PRINT_REG_SVE_FP(H), - VIXL_DECL_PRINT_REG_SVE_FP(S), - VIXL_DECL_PRINT_REG_SVE_FP(D), - VIXL_DECL_PRINT_REG_SVE(Q) -#undef VIXL_DECL_PRINT_REG_SVE -#undef VIXL_DECL_PRINT_REG_SVE_FP + // Supported combinations. + + kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar, + kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar, + kPrintHReg = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP, + kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, + kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, + + kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar, + kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector, + kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector, + kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar, + kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector, + kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector, + kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar, + kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector, + kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector, + kPrintReg1HFP = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP, + kPrintReg4HFP = kPrintRegLaneSizeH | kPrintRegAsDVector | kPrintRegAsFP, + kPrintReg8HFP = kPrintRegLaneSizeH | kPrintRegAsQVector | kPrintRegAsFP, + kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, + kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP, + kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP, + kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar, + kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector, + kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, + kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP, + kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar }; - // Return `format` with the kPrintRegPartial flag set. - PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) { - // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the - // result of this cast will always be well-defined. - return static_cast<PrintRegisterFormat>(format | kPrintRegPartial); - } - - // For SVE formats, return the format of a Q register part of it. - PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) { - VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); - // Keep the FP and lane size fields. - int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP); - // The resulting format must always be partial, because we're not formatting - // the whole Z register. - q_format |= (kPrintRegAsQVector | kPrintRegPartial); - - // This cast is always safe because NEON QVector formats support every - // combination of FP and lane size that SVE formats do. - return static_cast<PrintRegisterFormat>(q_format); - } - unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) { - VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown); return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset; } @@ -1880,51 +1411,17 @@ class Simulator : public DecoderVisitor { } unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) { - switch (format & kPrintRegAsVectorMask) { - case kPrintRegAsScalar: - return GetPrintRegLaneSizeInBytesLog2(format); - case kPrintRegAsDVector: - return kDRegSizeInBytesLog2; - case kPrintRegAsQVector: - return kQRegSizeInBytesLog2; - default: - case kPrintRegAsSVEVector: - // We print SVE vectors in Q-sized chunks. These need special handling, - // and it's probably an error to call this function in that case. - VIXL_UNREACHABLE(); - return kQRegSizeInBytesLog2; - } + if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2; + if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2; + + // Scalar types. + return GetPrintRegLaneSizeInBytesLog2(format); } unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) { return 1 << GetPrintRegSizeInBytesLog2(format); } - unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) { - return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2; - } - - unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) { - return 1 << GetPrintRegSizeInBitsLog2(format); - } - - const char* GetPartialRegSuffix(PrintRegisterFormat format) { - switch (GetPrintRegSizeInBitsLog2(format)) { - case kBRegSizeLog2: - return "<7:0>"; - case kHRegSizeLog2: - return "<15:0>"; - case kSRegSizeLog2: - return "<31:0>"; - case kDRegSizeLog2: - return "<63:0>"; - case kQRegSizeLog2: - return "<127:0>"; - } - VIXL_UNREACHABLE(); - return "<UNKNOWN>"; - } - unsigned GetPrintRegLaneCount(PrintRegisterFormat format) { unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format); unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format); @@ -1932,21 +1429,6 @@ class Simulator : public DecoderVisitor { return 1 << (reg_size_log2 - lane_size_log2); } - uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) { - int print_as = format & kPrintRegAsVectorMask; - if (print_as == kPrintRegAsScalar) return 1; - - // Vector formats, including SVE formats printed in Q-sized chunks. - static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001}; - unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format); - VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks)); - uint16_t mask = masks[size_in_bytes_log2]; - - // Exclude lanes that aren't visible in D vectors. - if (print_as == kPrintRegAsDVector) mask &= 0x00ff; - return mask; - } - PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size, unsigned lane_size); @@ -1977,10 +1459,6 @@ class Simulator : public DecoderVisitor { return format; } - PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) { - return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size)); - } - template <typename T> PrintRegisterFormat GetPrintRegisterFormat(T value) { return GetPrintRegisterFormatForSize(sizeof(value)); @@ -2007,314 +1485,99 @@ class Simulator : public DecoderVisitor { // Print all registers of the specified types. void PrintRegisters(); void PrintVRegisters(); - void PrintZRegisters(); void PrintSystemRegisters(); // As above, but only print the registers that have been updated. void PrintWrittenRegisters(); void PrintWrittenVRegisters(); - void PrintWrittenPRegisters(); // As above, but respect LOG_REG and LOG_VREG. void LogWrittenRegisters() { - if (ShouldTraceRegs()) PrintWrittenRegisters(); + if (GetTraceParameters() & LOG_REGS) PrintWrittenRegisters(); } void LogWrittenVRegisters() { - if (ShouldTraceVRegs()) PrintWrittenVRegisters(); - } - void LogWrittenPRegisters() { - if (ShouldTraceVRegs()) PrintWrittenPRegisters(); + if (GetTraceParameters() & LOG_VREGS) PrintWrittenVRegisters(); } void LogAllWrittenRegisters() { LogWrittenRegisters(); LogWrittenVRegisters(); - LogWrittenPRegisters(); - } - - // The amount of space to leave for a register name. This is used to keep the - // values vertically aligned. The longest register name has the form - // "z31<2047:1920>". The total overall value indentation must also take into - // account the fixed formatting: "# {name}: 0x{value}". - static const int kPrintRegisterNameFieldWidth = 14; - - // Print whole, individual register values. - // - The format can be used to restrict how much of the register is printed, - // but such formats indicate that the unprinted high-order bits are zero and - // these helpers will assert that. - // - If the format includes the kPrintRegAsFP flag then human-friendly FP - // value annotations will be printed. - // - The suffix can be used to add annotations (such as memory access - // details), or to suppress the newline. - void PrintRegister(int code, - PrintRegisterFormat format = kPrintXReg, - const char* suffix = "\n"); - void PrintVRegister(int code, - PrintRegisterFormat format = kPrintReg1Q, - const char* suffix = "\n"); - // PrintZRegister and PrintPRegister print over several lines, so they cannot - // allow the suffix to be overridden. - void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); - void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); - void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ); - // Print a single Q-sized part of a Z register, or the corresponding two-byte - // part of a P register. These print single lines, and therefore allow the - // suffix to be overridden. The format must include the kPrintRegPartial flag. - void PrintPartialZRegister(int code, - int q_index, - PrintRegisterFormat format = kPrintRegVnQ, - const char* suffix = "\n"); - void PrintPartialPRegister(int code, - int q_index, - PrintRegisterFormat format = kPrintRegVnQ, - const char* suffix = "\n"); - void PrintPartialPRegister(const char* name, - const SimPRegister& reg, - int q_index, - PrintRegisterFormat format = kPrintRegVnQ, - const char* suffix = "\n"); - - // Like Print*Register (above), but respect trace parameters. - void LogRegister(unsigned code, PrintRegisterFormat format) { - if (ShouldTraceRegs()) PrintRegister(code, format); - } - void LogVRegister(unsigned code, PrintRegisterFormat format) { - if (ShouldTraceVRegs()) PrintVRegister(code, format); - } - void LogZRegister(unsigned code, PrintRegisterFormat format) { - if (ShouldTraceVRegs()) PrintZRegister(code, format); - } - void LogPRegister(unsigned code, PrintRegisterFormat format) { - if (ShouldTraceVRegs()) PrintPRegister(code, format); - } - void LogFFR(PrintRegisterFormat format) { - if (ShouldTraceVRegs()) PrintFFR(format); } - // Other state updates, including system registers. + // Print individual register values (after update). + void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer); + void PrintVRegister(unsigned code, PrintRegisterFormat format); void PrintSystemRegister(SystemRegister id); void PrintTakenBranch(const Instruction* target); - void LogSystemRegister(SystemRegister id) { - if (ShouldTraceSysRegs()) PrintSystemRegister(id); - } - void LogTakenBranch(const Instruction* target) { - if (ShouldTraceBranches()) PrintTakenBranch(target); - } - - // Trace memory accesses. - // Common, contiguous register accesses (such as for scalars). - // The *Write variants automatically set kPrintRegPartial on the format. - void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address); - void PrintExtendingRead(int rt_code, - PrintRegisterFormat format, - int access_size_in_bytes, - uintptr_t address); - void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); - void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address); - void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); - // Simple, unpredicated SVE accesses always access the whole vector, and never - // know the lane type, so there's no need to accept a `format`. - void PrintZRead(int rt_code, uintptr_t address) { - vregisters_[rt_code].NotifyRegisterLogged(); - PrintZAccess(rt_code, "<-", address); - } - void PrintZWrite(int rt_code, uintptr_t address) { - PrintZAccess(rt_code, "->", address); + // Like Print* (above), but respect GetTraceParameters(). + void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) { + if (GetTraceParameters() & LOG_REGS) PrintRegister(code, r31mode); } - void PrintPRead(int rt_code, uintptr_t address) { - pregisters_[rt_code].NotifyRegisterLogged(); - PrintPAccess(rt_code, "<-", address); + void LogVRegister(unsigned code, PrintRegisterFormat format) { + if (GetTraceParameters() & LOG_VREGS) PrintVRegister(code, format); } - void PrintPWrite(int rt_code, uintptr_t address) { - PrintPAccess(rt_code, "->", address); + void LogSystemRegister(SystemRegister id) { + if (GetTraceParameters() & LOG_SYSREGS) PrintSystemRegister(id); } + void LogTakenBranch(const Instruction* target) { + if (GetTraceParameters() & LOG_BRANCH) PrintTakenBranch(target); + } + + // Print memory accesses. + void PrintRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format); + void PrintWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format); + void PrintVRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane); + void PrintVWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane); // Like Print* (above), but respect GetTraceParameters(). - void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { - if (ShouldTraceRegs()) PrintRead(rt_code, format, address); - } - void LogExtendingRead(int rt_code, - PrintRegisterFormat format, - int access_size_in_bytes, - uintptr_t address) { - if (ShouldTraceRegs()) { - PrintExtendingRead(rt_code, format, access_size_in_bytes, address); + void LogRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { + if (GetTraceParameters() & LOG_REGS) PrintRead(address, reg_code, format); + } + void LogWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format) { + if (GetTraceParameters() & LOG_WRITE) PrintWrite(address, reg_code, format); + } + void LogVRead(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane = 0) { + if (GetTraceParameters() & LOG_VREGS) { + PrintVRead(address, reg_code, format, lane); } } - void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { - if (ShouldTraceWrites()) PrintWrite(rt_code, format, address); - } - void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { - if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address); - } - void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { - if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address); - } - void LogZRead(int rt_code, uintptr_t address) { - if (ShouldTraceVRegs()) PrintZRead(rt_code, address); - } - void LogZWrite(int rt_code, uintptr_t address) { - if (ShouldTraceWrites()) PrintZWrite(rt_code, address); - } - void LogPRead(int rt_code, uintptr_t address) { - if (ShouldTraceVRegs()) PrintPRead(rt_code, address); - } - void LogPWrite(int rt_code, uintptr_t address) { - if (ShouldTraceWrites()) PrintPWrite(rt_code, address); - } - - // Helpers for the above, where the access operation is parameterised. - // - For loads, set op = "<-". - // - For stores, set op = "->". - void PrintAccess(int rt_code, - PrintRegisterFormat format, - const char* op, - uintptr_t address); - void PrintVAccess(int rt_code, - PrintRegisterFormat format, - const char* op, - uintptr_t address); - // Simple, unpredicated SVE accesses always access the whole vector, and never - // know the lane type, so these don't accept a `format`. - void PrintZAccess(int rt_code, const char* op, uintptr_t address); - void PrintPAccess(int rt_code, const char* op, uintptr_t address); - - // Multiple-structure accesses. - void PrintVStructAccess(int rt_code, - int reg_count, - PrintRegisterFormat format, - const char* op, - uintptr_t address); - // Single-structure (single-lane) accesses. - void PrintVSingleStructAccess(int rt_code, - int reg_count, - int lane, - PrintRegisterFormat format, - const char* op, - uintptr_t address); - // Replicating accesses. - void PrintVReplicatingStructAccess(int rt_code, - int reg_count, - PrintRegisterFormat format, - const char* op, - uintptr_t address); - - // Multiple-structure accesses. - void PrintZStructAccess(int rt_code, - int reg_count, - const LogicPRegister& pg, - PrintRegisterFormat format, - int msize_in_bytes, - const char* op, - const LogicSVEAddressVector& addr); - - // Register-printing helper for all structured accessors. - // - // All lanes (according to `format`) are printed, but lanes indicated by - // `focus_mask` are of particular interest. Each bit corresponds to a byte in - // the printed register, in a manner similar to SVE's predicates. Currently, - // this is used to determine when to print human-readable FP annotations. - void PrintVRegistersForStructuredAccess(int rt_code, - int reg_count, - uint16_t focus_mask, - PrintRegisterFormat format); - - // As for the VRegister variant, but print partial Z register names. - void PrintZRegistersForStructuredAccess(int rt_code, - int q_index, - int reg_count, - uint16_t focus_mask, - PrintRegisterFormat format); - - // Print part of a memory access. This should be used for annotating - // non-trivial accesses, such as structured or sign-extending loads. Call - // Print*Register (or Print*RegistersForStructuredAccess), then - // PrintPartialAccess for each contiguous access that makes up the - // instruction. - // - // access_mask: - // The lanes to be printed. Each bit corresponds to a byte in the printed - // register, in a manner similar to SVE's predicates, except that the - // lane size is not respected when interpreting lane_mask: unaligned bits - // must be zeroed. - // - // This function asserts that this mask is non-zero. - // - // future_access_mask: - // The lanes to be printed by a future invocation. This must be specified - // because vertical lines are drawn for partial accesses that haven't yet - // been printed. The format is the same as for accessed_mask. - // - // If a lane is active in both `access_mask` and `future_access_mask`, - // `access_mask` takes precedence. - // - // struct_element_count: - // The number of elements in each structure. For non-structured accesses, - // set this to one. Along with lane_size_in_bytes, this is used determine - // the size of each access, and to format the accessed value. - // - // op: - // For stores, use "->". For loads, use "<-". - // - // address: - // The address of this partial access. (Not the base address of the whole - // instruction.) The traced value is read from this address (according to - // part_count and lane_size_in_bytes) so it must be accessible, and when - // tracing stores, the store must have been executed before this function - // is called. - // - // reg_size_in_bytes: - // The size of the register being accessed. This helper is usually used - // for V registers or Q-sized chunks of Z registers, so that is the - // default, but it is possible to use this to annotate X register - // accesses by specifying kXRegSizeInBytes. - // - // The return value is a future_access_mask suitable for the next iteration, - // so that it is possible to execute this in a loop, until the mask is zero. - // Note that accessed_mask must still be updated by the caller for each call. - uint16_t PrintPartialAccess(uint16_t access_mask, - uint16_t future_access_mask, - int struct_element_count, - int lane_size_in_bytes, - const char* op, - uintptr_t address, - int reg_size_in_bytes = kQRegSizeInBytes); - - // Print an abstract register value. This works for all register types, and - // can print parts of registers. This exists to ensure consistent formatting - // of values. - void PrintRegisterValue(const uint8_t* value, - int value_size, - PrintRegisterFormat format); - template <typename T> - void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) { - PrintRegisterValue(sim_register.GetBytes(), - std::min(sim_register.GetSizeInBytes(), - kQRegSizeInBytes), - format); + void LogVWrite(uintptr_t address, + unsigned reg_code, + PrintRegisterFormat format, + unsigned lane = 0) { + if (GetTraceParameters() & LOG_WRITE) { + PrintVWrite(address, reg_code, format, lane); + } } - // As above, but format as an SVE predicate value, using binary notation with - // spaces between each bit so that they align with the Z register bytes that - // they predicate. - void PrintPRegisterValue(uint16_t value); - - void PrintRegisterValueFPAnnotations(const uint8_t* value, - uint16_t lane_mask, - PrintRegisterFormat format); - template <typename T> - void PrintRegisterValueFPAnnotations(const T& sim_register, - uint16_t lane_mask, - PrintRegisterFormat format) { - PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format); - } - template <typename T> - void PrintRegisterValueFPAnnotations(const T& sim_register, - PrintRegisterFormat format) { - PrintRegisterValueFPAnnotations(sim_register.GetBytes(), - GetPrintRegLaneMask(format), - format); - } + // Helper functions for register tracing. + void PrintRegisterRawHelper(unsigned code, + Reg31Mode r31mode, + int size_in_bytes = kXRegSizeInBytes); + void PrintVRegisterRawHelper(unsigned code, + int bytes = kQRegSizeInBytes, + int lsb = 0); + void PrintVRegisterFPHelper(unsigned code, + unsigned lane_size_in_bytes, + int lane_count = 1, + int rightmost_lane = 0); VIXL_NO_RETURN void DoUnreachable(const Instruction* instr); void DoTrace(const Instruction* instr); @@ -2324,13 +1587,10 @@ class Simulator : public DecoderVisitor { Reg31Mode mode = Reg31IsZeroRegister); static const char* XRegNameForCode(unsigned code, Reg31Mode mode = Reg31IsZeroRegister); - static const char* BRegNameForCode(unsigned code); static const char* HRegNameForCode(unsigned code); static const char* SRegNameForCode(unsigned code); static const char* DRegNameForCode(unsigned code); static const char* VRegNameForCode(unsigned code); - static const char* ZRegNameForCode(unsigned code); - static const char* PRegNameForCode(unsigned code); bool IsColouredTrace() const { return coloured_trace_; } VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) { @@ -2349,28 +1609,18 @@ class Simulator : public DecoderVisitor { return GetTraceParameters(); } - bool ShouldTraceWrites() const { - return (GetTraceParameters() & LOG_WRITE) != 0; - } - bool ShouldTraceRegs() const { - return (GetTraceParameters() & LOG_REGS) != 0; - } - bool ShouldTraceVRegs() const { - return (GetTraceParameters() & LOG_VREGS) != 0; - } - bool ShouldTraceSysRegs() const { - return (GetTraceParameters() & LOG_SYSREGS) != 0; - } - bool ShouldTraceBranches() const { - return (GetTraceParameters() & LOG_BRANCH) != 0; - } - void SetTraceParameters(int parameters); VIXL_DEPRECATED("SetTraceParameters", void set_trace_parameters(int parameters)) { SetTraceParameters(parameters); } + void SetInstructionStats(bool value); + VIXL_DEPRECATED("SetInstructionStats", + void set_instruction_stats(bool value)) { + SetInstructionStats(value); + } + // Clear the simulated local monitor to force the next store-exclusive // instruction to fail. void ClearLocalMonitor() { local_monitor_.Clear(); } @@ -2553,92 +1803,6 @@ class Simulator : public DecoderVisitor { }; #endif - // Configure the simulated value of 'VL', which is the size of a Z register. - // Because this cannot occur during a program's lifetime, this function also - // resets the SVE registers. - void SetVectorLengthInBits(unsigned vector_length); - - unsigned GetVectorLengthInBits() const { return vector_length_; } - unsigned GetVectorLengthInBytes() const { - VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0); - return vector_length_ / kBitsPerByte; - } - unsigned GetPredicateLengthInBits() const { - VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0); - return GetVectorLengthInBits() / kZRegBitsPerPRegBit; - } - unsigned GetPredicateLengthInBytes() const { - VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0); - return GetVectorLengthInBytes() / kZRegBitsPerPRegBit; - } - - unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const { - if (IsSVEFormat(vform)) { - return GetVectorLengthInBits(); - } else { - return vixl::aarch64::RegisterSizeInBitsFromFormat(vform); - } - } - - unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const { - unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform); - VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); - return size_in_bits / kBitsPerByte; - } - - int LaneCountFromFormat(VectorFormat vform) const { - if (IsSVEFormat(vform)) { - return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform); - } else { - return vixl::aarch64::LaneCountFromFormat(vform); - } - } - - bool IsFirstActive(VectorFormat vform, - const LogicPRegister& mask, - const LogicPRegister& bits) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (mask.IsActive(vform, i)) { - return bits.IsActive(vform, i); - } - } - return false; - } - - bool AreNoneActive(VectorFormat vform, - const LogicPRegister& mask, - const LogicPRegister& bits) { - for (int i = 0; i < LaneCountFromFormat(vform); i++) { - if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) { - return false; - } - } - return true; - } - - bool IsLastActive(VectorFormat vform, - const LogicPRegister& mask, - const LogicPRegister& bits) { - for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { - if (mask.IsActive(vform, i)) { - return bits.IsActive(vform, i); - } - } - return false; - } - - void PredTest(VectorFormat vform, - const LogicPRegister& mask, - const LogicPRegister& bits) { - ReadNzcv().SetN(IsFirstActive(vform, mask, bits)); - ReadNzcv().SetZ(AreNoneActive(vform, mask, bits)); - ReadNzcv().SetC(!IsLastActive(vform, mask, bits)); - ReadNzcv().SetV(0); - LogSystemRegister(NZCV); - } - - SimPRegister& GetPTrue() { return pregister_all_true_; } - protected: const char* clr_normal; const char* clr_flag_name; @@ -2647,8 +1811,6 @@ class Simulator : public DecoderVisitor { const char* clr_reg_value; const char* clr_vreg_name; const char* clr_vreg_value; - const char* clr_preg_name; - const char* clr_preg_value; const char* clr_memory_address; const char* clr_warning; const char* clr_warning_message; @@ -2656,13 +1818,6 @@ class Simulator : public DecoderVisitor { const char* clr_branch_marker; // Simulation helpers ------------------------------------ - - void ResetSystemRegisters(); - void ResetRegisters(); - void ResetVRegisters(); - void ResetPRegisters(); - void ResetFFR(); - bool ConditionPassed(Condition cond) { switch (cond) { case eq: @@ -2752,7 +1907,7 @@ class Simulator : public DecoderVisitor { } int64_t ShiftOperand(unsigned reg_size, - uint64_t value, + int64_t value, Shift shift_type, unsigned amount) const; int64_t ExtendValue(unsigned reg_width, @@ -2764,11 +1919,6 @@ class Simulator : public DecoderVisitor { void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr); void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr); void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr); - void ld1r(VectorFormat vform, - VectorFormat unpack_vform, - LogicVRegister dst, - uint64_t addr, - bool is_signed = false); void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, @@ -2870,43 +2020,16 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - // Add `value` to each lane of `src1`, treating `value` as unsigned for the - // purposes of setting the saturation flags. - LogicVRegister add_uint(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - uint64_t value); LogicVRegister addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - LogicPRegister brka(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn); - LogicPRegister brkb(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn); - LogicPRegister brkn(LogicPRegister pdm, - const LogicPRegister& pg, - const LogicPRegister& pn); - LogicPRegister brkpa(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn, - const LogicPRegister& pm); - LogicPRegister brkpb(LogicPRegister pd, - const LogicPRegister& pg, - const LogicPRegister& pn, - const LogicPRegister& pm); - // dst = srca + src1 * src2 LogicVRegister mla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); - // dst = srca - src1 * src2 LogicVRegister mls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister mul(VectorFormat vform, @@ -2932,14 +2055,6 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - LogicVRegister sdiv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicVRegister udiv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform, LogicVRegister dst, @@ -2986,10 +2101,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister smulh(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); LogicVRegister smull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3050,10 +2161,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); - LogicVRegister umulh(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); LogicVRegister sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3118,12 +2225,6 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - // Subtract `value` from each lane of `src1`, treating `value` as unsigned for - // the purposes of setting the saturation flags. - LogicVRegister sub_uint(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - uint64_t value); LogicVRegister and_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3166,9 +2267,6 @@ class Simulator : public DecoderVisitor { LogicVRegister clz(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); - LogicVRegister cnot(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src); LogicVRegister cnt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -3180,11 +2278,8 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister rev(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src); - LogicVRegister rev_byte(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - int rev_size); + const LogicVRegister& src, + int revSize); LogicVRegister rev16(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -3232,7 +2327,6 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - const LogicVRegister& acc, int index, int rot); LogicVRegister fcmla(VectorFormat vform, @@ -3241,25 +2335,17 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2, int index, int rot); + template <typename T> LogicVRegister fcmla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - const LogicVRegister& acc, int rot); - template <typename T> - LogicVRegister fadda(VectorFormat vform, - LogicVRegister acc, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister fadda(VectorFormat vform, - LogicVRegister acc, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister index(VectorFormat vform, + LogicVRegister fcmla(VectorFormat vform, LogicVRegister dst, - uint64_t start, - uint64_t step); + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); LogicVRegister ins_element(VectorFormat vform, LogicVRegister dst, int dst_index, @@ -3269,36 +2355,13 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, int dst_index, uint64_t imm); - LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index); - LogicVRegister dup_elements_to_segments(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - int src_index); LogicVRegister dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm); - LogicVRegister mov(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src); - LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src); - LogicVRegister mov_merging(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src); - LogicVRegister mov_zeroing(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src); - LogicPRegister mov_merging(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src); - LogicPRegister mov_zeroing(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src); LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm); LogicVRegister orr(VectorFormat vform, @@ -3313,32 +2376,6 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); - // Perform a "conditional last" operation. The first part of the pair is true - // if any predicate lane is active, false otherwise. The second part takes the - // value of the last active (plus offset) lane, or last (plus offset) lane if - // none active. - std::pair<bool, uint64_t> clast(VectorFormat vform, - const LogicPRegister& pg, - const LogicVRegister& src2, - int offset_from_last_active); - LogicVRegister compact(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister splice(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicVRegister sel(VectorFormat vform, - LogicVRegister dst, - const SimPRegister& pg, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicPRegister sel(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src1, - const LogicPRegister& src2); LogicVRegister sminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3379,7 +2416,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister sminmaxv(VectorFormat vform, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, bool max); LogicVRegister smaxv(VectorFormat vform, @@ -3400,14 +2436,6 @@ class Simulator : public DecoderVisitor { LogicVRegister sxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); - LogicVRegister uxt(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - unsigned from_size_in_bits); - LogicVRegister sxt(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - unsigned from_size_in_bits); LogicVRegister tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, @@ -3432,10 +2460,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& ind); LogicVRegister Table(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& src, - const LogicVRegister& tab); - LogicVRegister Table(VectorFormat vform, - LogicVRegister dst, const LogicVRegister& ind, bool zero_out_of_bounds, const LogicVRegister* tab1, @@ -3556,7 +2580,6 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src2); LogicVRegister uminmaxv(VectorFormat vform, LogicVRegister dst, - const LogicPRegister& pg, const LogicVRegister& src, bool max); LogicVRegister umaxv(VectorFormat vform, @@ -3594,27 +2617,11 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src, int shift); LogicVRegister scvtf(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits = 0); - LogicVRegister scvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding rounding_mode); LogicVRegister ucvtf(VectorFormat vform, - unsigned dst_data_size, - unsigned src_data_size, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits = 0); - LogicVRegister ucvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, @@ -3699,9 +2706,9 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src); LogicVRegister extractnarrow(VectorFormat vform, LogicVRegister dst, - bool dst_is_signed, + bool dstIsSigned, const LogicVRegister& src, - bool src_is_signed); + bool srcIsSigned); LogicVRegister xtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); @@ -3718,7 +2725,7 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, - bool is_signed); + bool issigned); LogicVRegister saba(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3944,23 +2951,19 @@ class Simulator : public DecoderVisitor { template <typename T> LogicVRegister fmla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fmla(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); template <typename T> LogicVRegister fmls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fmls(VectorFormat vform, LogicVRegister dst, - const LogicVRegister& srca, const LogicVRegister& src1, const LogicVRegister& src2); LogicVRegister fnmul(VectorFormat vform, @@ -4020,31 +3023,6 @@ class Simulator : public DecoderVisitor { LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); - LogicVRegister ftsmul(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicVRegister ftssel(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicVRegister ftmad(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - unsigned index); - LogicVRegister fexpa(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src); - template <typename T> - LogicVRegister fscale(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); - LogicVRegister fscale(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2); template <typename T> LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst, @@ -4056,40 +3034,19 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2); + LogicVRegister frint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, bool inexact_exception = false, FrintMode frint_mode = kFrintToInteger); - LogicVRegister fcvt(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister fcvts(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits = 0); LogicVRegister fcvts(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, int fbits = 0); LogicVRegister fcvtu(VectorFormat vform, - unsigned dst_data_size_in_bits, - unsigned src_data_size_in_bits, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src, - FPRounding round, - int fbits = 0); - LogicVRegister fcvtu(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, @@ -4129,78 +3086,16 @@ class Simulator : public DecoderVisitor { LogicVRegister dst, const LogicVRegister& src); - LogicPRegister pfalse(LogicPRegister dst); - LogicPRegister pfirst(LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src); - LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern); - LogicPRegister pnext(VectorFormat vform, - LogicPRegister dst, - const LogicPRegister& pg, - const LogicPRegister& src); - - LogicVRegister asrd(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - int shift); - - LogicVRegister andv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister eorv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister orv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister saddv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister sminv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister smaxv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister uaddv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister uminv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - LogicVRegister umaxv(VectorFormat vform, - LogicVRegister dst, - const LogicPRegister& pg, - const LogicVRegister& src); - template <typename T> - struct TFPPairOp { + struct TFPMinMaxOp { typedef T (Simulator::*type)(T a, T b); }; template <typename T> - LogicVRegister FPPairedAcrossHelper(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPPairOp<T>::type fn, - uint64_t inactive_value); - - LogicVRegister FPPairedAcrossHelper( - VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src, - typename TFPPairOp<vixl::internal::SimFloat16>::type fn16, - typename TFPPairOp<float>::type fn32, - typename TFPPairOp<double>::type fn64, - uint64_t inactive_value); + LogicVRegister fminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPMinMaxOp<T>::type Op); LogicVRegister fminv(VectorFormat vform, LogicVRegister dst, @@ -4214,9 +3109,6 @@ class Simulator : public DecoderVisitor { LogicVRegister fmaxnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src); - LogicVRegister faddv(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src); static const uint32_t CRC32_POLY = 0x04C11DB7; static const uint32_t CRC32C_POLY = 0x1EDC6F41; @@ -4317,129 +3209,6 @@ class Simulator : public DecoderVisitor { void DoSaveCPUFeatures(const Instruction* instr); void DoRestoreCPUFeatures(const Instruction* instr); - // General arithmetic helpers ---------------------------- - - // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or - // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must - // be a valid int<n>_t. - uint64_t IncDecN(uint64_t acc, - int64_t delta, - unsigned n, - bool is_saturating = false, - bool is_signed = false); - - // SVE helpers ------------------------------------------- - LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op, - VectorFormat vform, - LogicVRegister zd, - const LogicVRegister& zn, - const LogicVRegister& zm); - - LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, - LogicPRegister Pd, - const LogicPRegister& pn, - const LogicPRegister& pm); - - LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op, - VectorFormat vform, - LogicVRegister zd, - uint64_t imm); - enum UnpackType { kHiHalf, kLoHalf }; - enum ExtendType { kSignedExtend, kUnsignedExtend }; - LogicVRegister unpk(VectorFormat vform, - LogicVRegister zd, - const LogicVRegister& zn, - UnpackType unpack_type, - ExtendType extend_type); - - LogicPRegister SVEIntCompareVectorsHelper(Condition cc, - VectorFormat vform, - LogicPRegister dst, - const LogicPRegister& mask, - const LogicVRegister& src1, - const LogicVRegister& src2, - bool is_wide_elements = false, - FlagsUpdate flags = SetFlags); - - void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, - VectorFormat vform, - SVEOffsetModifier mod); - - // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`. - // - // `zt_code` specifies the code of the first register (zt). Each additional - // register (up to `reg_count`) is `(zt_code + i) % 32`. - // - // This helper calls LogZWrite in the proper way, according to `addr`. - void SVEStructuredStoreHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr); - // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`. - void SVEStructuredLoadHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr, - bool is_signed = false); - - enum SVEFaultTolerantLoadType { - // - Elements active in both FFR and pg are accessed as usual. If the access - // fails, the corresponding lane and all subsequent lanes are filled with - // an unpredictable value, and made inactive in FFR. - // - // - Elements active in FFR but not pg are set to zero. - // - // - Elements that are not active in FFR are filled with an unpredictable - // value, regardless of pg. - kSVENonFaultLoad, - - // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the - // first active element is always accessed, regardless of FFR, and will - // generate a real fault if it is inaccessible. If the lane is not active in - // FFR, the actual value loaded into the result is still unpredictable. - kSVEFirstFaultLoad - }; - - // Load with first-faulting or non-faulting load semantics, respecting and - // updating FFR. - void SVEFaultTolerantLoadHelper(VectorFormat vform, - const LogicPRegister& pg, - unsigned zt_code, - const LogicSVEAddressVector& addr, - SVEFaultTolerantLoadType type, - bool is_signed); - - LogicVRegister SVEBitwiseShiftHelper(Shift shift_op, - VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - bool is_wide_elements); - - template <typename T> - LogicVRegister FTMaddHelper(VectorFormat vform, - LogicVRegister dst, - const LogicVRegister& src1, - const LogicVRegister& src2, - uint64_t coeff_pos, - uint64_t coeff_neg); - - // Return the first or last active lane, or -1 if none are active. - int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const; - int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const; - - int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const; - - // Count active and true lanes in `pn`. - int CountActiveAndTrueLanes(VectorFormat vform, - const LogicPRegister& pg, - const LogicPRegister& pn) const; - - // Count the number of lanes referred to by `pattern`, given the vector - // length. If `pattern` is not a recognised SVEPredicateConstraint, this - // returns zero. - int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const; - // Simulate a runtime call. void DoRuntimeCall(const Instruction* instr); @@ -4453,21 +3222,15 @@ class Simulator : public DecoderVisitor { FILE* stream_; PrintDisassembler* print_disasm_; + // Instruction statistics instrumentation. + Instrument* instrumentation_; + // General purpose registers. Register 31 is the stack pointer. SimRegister registers_[kNumberOfRegisters]; // Vector registers SimVRegister vregisters_[kNumberOfVRegisters]; - // SVE predicate registers. - SimPRegister pregisters_[kNumberOfPRegisters]; - - // SVE first-fault register. - SimFFRRegister ffr_register_; - - // A pseudo SVE predicate register with all bits set to true. - SimPRegister pregister_all_true_; - // Program Status Register. // bits[31, 27]: Condition flags N, Z, C, and V. // (Negative, Zero, Carry, Overflow) @@ -4503,10 +3266,8 @@ class Simulator : public DecoderVisitor { // Stack byte* stack_; static const int stack_protection_size_ = 256; - // 8 KB stack. - // TODO: Make this configurable, or automatically allocate space as it runs - // out (like the OS would try to do). - static const int stack_size_ = 8 * 1024 + 2 * stack_protection_size_; + // 2 KB stack. + static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_; byte* stack_limit_; Decoder* decoder_; @@ -4515,10 +3276,6 @@ class Simulator : public DecoderVisitor { bool pc_modified_; const Instruction* pc_; - // If non-NULL, the last instruction was a movprfx, and validity needs to be - // checked. - Instruction const* movprfx_; - // Branch type register, used for branch target identification. BType btype_; @@ -4532,13 +3289,10 @@ class Simulator : public DecoderVisitor { static const char* xreg_names[]; static const char* wreg_names[]; - static const char* breg_names[]; static const char* hreg_names[]; static const char* sreg_names[]; static const char* dreg_names[]; static const char* vreg_names[]; - static const char* zreg_names[]; - static const char* preg_names[]; private: static const PACKey kPACKeyIA; @@ -4547,13 +3301,6 @@ class Simulator : public DecoderVisitor { static const PACKey kPACKeyDB; static const PACKey kPACKeyGA; - bool CanReadMemory(uintptr_t address, size_t size); - - // CanReadMemory needs dummy file descriptors, so we use a pipe. We can save - // some system call overhead by opening them on construction, rather than on - // every call to CanReadMemory. - int dummy_pipe_fd_[2]; - template <typename T> static T FPDefaultNaN(); @@ -4606,24 +3353,14 @@ class Simulator : public DecoderVisitor { } } - // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of - // the destination is set to all true (0xff) when the corresponding - // predicate flag is set, and false (0x00) otherwise. - SimVRegister ExpandToSimVRegister(const SimPRegister& preg); - - // Set each predicate flag in pd where the corresponding assigned-sized lane - // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite - // operation to ExpandToSimVRegister(), except that any non-zero lane is - // interpreted as true. - void ExtractFromSimVRegister(VectorFormat vform, - SimPRegister& pd, // NOLINT(runtime/references) - SimVRegister vreg); - bool coloured_trace_; // A set of TraceParameters flags. int trace_parameters_; + // Indicates whether the instruction instrumentation is active. + bool instruction_stats_; + // Indicates whether the exclusive-access warning has been printed. bool print_exclusive_access_warning_; void PrintExclusiveAccessWarning(); @@ -4631,14 +3368,8 @@ class Simulator : public DecoderVisitor { CPUFeaturesAuditor cpu_features_auditor_; std::vector<CPUFeatures> saved_cpu_features_; - // State for *rand48 functions, used to simulate randomness with repeatable - // behaviour (so that tests are deterministic). This is used to simulate RNDR - // and RNDRRS, as well as to simulate a source of entropy for architecturally - // undefined behaviour. - uint16_t rand_state_[3]; - - // A configurable size of SVE vector registers. - unsigned vector_length_; + // The simulated state of RNDR and RNDRRS for generating a random number. + uint16_t rndr_state_[3]; }; #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L diff --git a/src/aarch64/simulator-constants-aarch64.h b/src/aarch64/simulator-constants-aarch64.h index e2389f11..3256f30e 100644 --- a/src/aarch64/simulator-constants-aarch64.h +++ b/src/aarch64/simulator-constants-aarch64.h @@ -121,7 +121,7 @@ const unsigned kTraceLength = 3 * kInstructionSize; enum TraceParameters { LOG_DISASM = 1 << 0, // Log disassembly. LOG_REGS = 1 << 1, // Log general purpose registers. - LOG_VREGS = 1 << 2, // Log SVE, NEON and floating-point registers. + LOG_VREGS = 1 << 2, // Log NEON and floating-point registers. LOG_SYSREGS = 1 << 3, // Log the flags and system registers. LOG_WRITE = 1 << 4, // Log writes to memory. LOG_BRANCH = 1 << 5, // Log taken branches. diff --git a/src/cpu-features.cc b/src/cpu-features.cc index 08db3f44..ea1e0d3e 100644 --- a/src/cpu-features.cc +++ b/src/cpu-features.cc @@ -37,9 +37,31 @@ namespace vixl { +static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) { + if (feature == CPUFeatures::kNone) { + return 0; + } else { + // Check that the shift is well-defined, and that the feature is valid. + VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <= + (sizeof(uint64_t) * 8)); + VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures); + return UINT64_C(1) << feature; + } +} + +CPUFeatures::CPUFeatures(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) + : features_(0) { + Combine(feature0, feature1, feature2, feature3); +} + CPUFeatures CPUFeatures::All() { CPUFeatures all; - all.features_.set(); + // Check that the shift is well-defined. + VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8)); + all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1; return all; } @@ -67,27 +89,74 @@ void CPUFeatures::Combine(const CPUFeatures& other) { features_ |= other.features_; } -void CPUFeatures::Combine(Feature feature) { - if (feature != CPUFeatures::kNone) features_.set(feature); +void CPUFeatures::Combine(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) { + features_ |= MakeFeatureMask(feature0); + features_ |= MakeFeatureMask(feature1); + features_ |= MakeFeatureMask(feature2); + features_ |= MakeFeatureMask(feature3); } void CPUFeatures::Remove(const CPUFeatures& other) { features_ &= ~other.features_; } -void CPUFeatures::Remove(Feature feature) { - if (feature != CPUFeatures::kNone) features_.reset(feature); +void CPUFeatures::Remove(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) { + features_ &= ~MakeFeatureMask(feature0); + features_ &= ~MakeFeatureMask(feature1); + features_ &= ~MakeFeatureMask(feature2); + features_ &= ~MakeFeatureMask(feature3); +} + +CPUFeatures CPUFeatures::With(const CPUFeatures& other) const { + CPUFeatures f(*this); + f.Combine(other); + return f; +} + +CPUFeatures CPUFeatures::With(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + CPUFeatures f(*this); + f.Combine(feature0, feature1, feature2, feature3); + return f; +} + +CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const { + CPUFeatures f(*this); + f.Remove(other); + return f; +} + +CPUFeatures CPUFeatures::Without(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + CPUFeatures f(*this); + f.Remove(feature0, feature1, feature2, feature3); + return f; } bool CPUFeatures::Has(const CPUFeatures& other) const { return (features_ & other.features_) == other.features_; } -bool CPUFeatures::Has(Feature feature) const { - return (feature == CPUFeatures::kNone) || features_[feature]; +bool CPUFeatures::Has(Feature feature0, + Feature feature1, + Feature feature2, + Feature feature3) const { + uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) | + MakeFeatureMask(feature2) | MakeFeatureMask(feature3); + return (features_ & mask) == mask; } -size_t CPUFeatures::Count() const { return features_.count(); } +size_t CPUFeatures::Count() const { return CountSetBits(features_); } std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) { // clang-format off @@ -108,9 +177,12 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE) } CPUFeatures::const_iterator CPUFeatures::begin() const { - // For iterators in general, it's undefined to increment `end()`, but here we - // control the implementation and it is safe to do this. - return ++end(); + if (features_ == 0) return const_iterator(this, kNone); + + int feature_number = CountTrailingZeros(features_); + vixl::CPUFeatures::Feature feature = + static_cast<CPUFeatures::Feature>(feature_number); + return const_iterator(this, feature); } CPUFeatures::const_iterator CPUFeatures::end() const { @@ -118,11 +190,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const { } std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) { - bool need_separator = false; - for (CPUFeatures::Feature feature : features) { - if (need_separator) os << ", "; - need_separator = true; - os << feature; + CPUFeatures::const_iterator it = features.begin(); + while (it != features.end()) { + os << *it; + ++it; + if (it != features.end()) os << ", "; } return os; } @@ -133,7 +205,7 @@ bool CPUFeaturesConstIterator::operator==( return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_); } -CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() { // Prefix +CPUFeatures::Feature CPUFeaturesConstIterator::operator++() { // Prefix VIXL_ASSERT(IsValid()); do { // Find the next feature. The order is unspecified. @@ -147,11 +219,11 @@ CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() { // Prefix // cpu_features_->Has(kNone) is always true, so this will terminate even if // the features list is empty. } while (!cpu_features_->Has(feature_)); - return *this; + return feature_; } -CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) { // Postfix - CPUFeaturesConstIterator result = *this; +CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) { // Postfix + CPUFeatures::Feature result = feature_; ++(*this); return result; } diff --git a/src/cpu-features.h b/src/cpu-features.h index 1b0f2c24..50ddc267 100644 --- a/src/cpu-features.h +++ b/src/cpu-features.h @@ -27,7 +27,6 @@ #ifndef VIXL_CPU_FEATURES_H #define VIXL_CPU_FEATURES_H -#include <bitset> #include <ostream> #include "globals-vixl.h" @@ -35,65 +34,16 @@ namespace vixl { -// VIXL aims to handle and detect all architectural features that are likely to -// influence code-generation decisions at EL0 (user-space). -// -// - There may be multiple VIXL feature flags for a given architectural -// extension. This occurs where the extension allow components to be -// implemented independently, or where kernel support is needed, and is likely -// to be fragmented. -// -// For example, Pointer Authentication (kPAuth*) has a separate feature flag -// for access to PACGA, and to indicate that the QARMA algorithm is -// implemented. -// -// - Conversely, some extensions have configuration options that do not affect -// EL0, so these are presented as a single VIXL feature. -// -// For example, the RAS extension (kRAS) has several variants, but the only -// feature relevant to VIXL is the addition of the ESB instruction so we only -// need a single flag. -// -// - VIXL offers separate flags for separate features even if they're -// architecturally linked. -// -// For example, the architecture requires kFPHalf and kNEONHalf to be equal, -// but they have separate hardware ID register fields so VIXL presents them as -// separate features. -// -// - VIXL can detect every feature for which it can generate code. -// -// - VIXL can detect some features for which it cannot generate code. -// -// The CPUFeatures::Feature enum — derived from the macro list below — is -// frequently extended. New features may be added to the list at any point, and -// no assumptions should be made about the numerical values assigned to each -// enum constant. The symbolic names can be considered to be stable. -// -// The debug descriptions are used only for debug output. The 'cpuinfo' strings -// are informative; VIXL does not use /proc/cpuinfo for feature detection. - // clang-format off #define VIXL_CPU_FEATURE_LIST(V) \ /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ /* registers, so that the detailed feature registers can be read */ \ /* directly. */ \ - \ - /* Constant name Debug description Linux 'cpuinfo' string. */ \ V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ \ V(kFP, "FP", "fp") \ V(kNEON, "NEON", "asimd") \ V(kCRC32, "CRC32", "crc32") \ - V(kDGH, "DGH", "dgh") \ - /* Speculation control features. */ \ - V(kCSV2, "CSV2", NULL) \ - V(kSCXTNUM, "SCXTNUM", NULL) \ - V(kCSV3, "CSV3", NULL) \ - V(kSB, "SB", "sb") \ - V(kSPECRES, "SPECRES", NULL) \ - V(kSSBS, "SSBS", NULL) \ - V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \ /* Cryptographic support instructions. */ \ V(kAES, "AES", "aes") \ V(kSHA1, "SHA1", "sha1") \ @@ -108,36 +58,28 @@ namespace vixl { V(kRDM, "RDM", "asimdrdm") \ /* Scalable Vector Extension. */ \ V(kSVE, "SVE", "sve") \ - V(kSVEF64MM, "SVE F64MM", "svef64mm") \ - V(kSVEF32MM, "SVE F32MM", "svef32mm") \ - V(kSVEI8MM, "SVE I8MM", "svei8imm") \ - V(kSVEBF16, "SVE BFloat16", "svebf16") \ /* SDOT and UDOT support (in NEON). */ \ V(kDotProduct, "DotProduct", "asimddp") \ - /* Int8 matrix multiplication (in NEON). */ \ - V(kI8MM, "NEON I8MM", "i8mm") \ /* Half-precision (FP16) support for FP and NEON, respectively. */ \ V(kFPHalf, "FPHalf", "fphp") \ V(kNEONHalf, "NEONHalf", "asimdhp") \ - /* BFloat16 support (in both FP and NEON.) */ \ - V(kBF16, "FP/NEON BFloat 16", "bf16") \ /* The RAS extension, including the ESB instruction. */ \ V(kRAS, "RAS", NULL) \ /* Data cache clean to the point of persistence: DC CVAP. */ \ V(kDCPoP, "DCPoP", "dcpop") \ /* Data cache clean to the point of deep persistence: DC CVADP. */ \ - V(kDCCVADP, "DCCVADP", "dcpodp") \ + V(kDCCVADP, "DCCVADP", NULL) \ /* Cryptographic support instructions. */ \ V(kSHA3, "SHA3", "sha3") \ V(kSHA512, "SHA512", "sha512") \ V(kSM3, "SM3", "sm3") \ V(kSM4, "SM4", "sm4") \ /* Pointer authentication for addresses. */ \ - V(kPAuth, "PAuth", "paca") \ + V(kPAuth, "PAuth", NULL) \ /* Pointer authentication for addresses uses QARMA. */ \ V(kPAuthQARMA, "PAuthQARMA", NULL) \ /* Generic authentication (using the PACGA instruction). */ \ - V(kPAuthGeneric, "PAuthGeneric", "pacg") \ + V(kPAuthGeneric, "PAuthGeneric", NULL) \ /* Generic authentication uses QARMA. */ \ V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ @@ -156,21 +98,13 @@ namespace vixl { /* Data-independent timing (for selected instructions). */ \ V(kDIT, "DIT", "dit") \ /* Branch target identification. */ \ - V(kBTI, "BTI", "bti") \ + V(kBTI, "BTI", NULL) \ /* Flag manipulation instructions: {AX,XA}FLAG */ \ - V(kAXFlag, "AXFlag", "flagm2") \ + V(kAXFlag, "AXFlag", NULL) \ /* Random number generation extension, */ \ - V(kRNG, "RNG", "rng") \ + V(kRNG, "RNG", NULL) \ /* Floating-point round to {32,64}-bit integer. */ \ - V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \ - /* Memory Tagging Extension. */ \ - V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ - V(kMTE, "MTE", NULL) \ - /* PAuth extensions. */ \ - V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ - V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ - V(kPAuthFPAC, "PAuth FPAC", NULL) \ - V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) + V(kFrintToFixedSizedInt,"Frint (bounded)", NULL) // clang-format on @@ -263,13 +197,13 @@ class CPUFeatures { // clang-format on // By default, construct with no features enabled. - CPUFeatures() : features_{} {} + CPUFeatures() : features_(0) {} // Construct with some features already enabled. - template <typename T, typename... U> - CPUFeatures(T first, U... others) : features_{} { - Combine(first, others...); - } + CPUFeatures(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); // Construct with all features enabled. This can be used to disable feature // checking: `Has(...)` returns true regardless of the argument. @@ -302,59 +236,41 @@ class CPUFeatures { // exist in this set are left unchanged. void Combine(const CPUFeatures& other); - // Combine a specific feature into this set. If it already exists in the set, - // the set is left unchanged. - void Combine(Feature feature); - - // Combine multiple features (or feature sets) into this set. - template <typename T, typename... U> - void Combine(T first, U... others) { - Combine(first); - Combine(others...); - } + // Combine specific features into this set. Features that already exist in + // this set are left unchanged. + void Combine(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); // Remove features in another CPUFeatures object from this one. void Remove(const CPUFeatures& other); - // Remove a specific feature from this set. This has no effect if the feature - // doesn't exist in the set. - void Remove(Feature feature0); - - // Remove multiple features (or feature sets) from this set. - template <typename T, typename... U> - void Remove(T first, U... others) { - Remove(first); - Remove(others...); - } - - // Chaining helpers for convenient construction by combining other CPUFeatures - // or individual Features. - template <typename... T> - CPUFeatures With(T... others) const { - CPUFeatures f(*this); - f.Combine(others...); - return f; - } - - template <typename... T> - CPUFeatures Without(T... others) const { - CPUFeatures f(*this); - f.Remove(others...); - return f; - } - - // Test whether the `other` feature set is equal to or a subset of this one. + // Remove specific features from this set. + void Remove(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone); + + // Chaining helpers for convenient construction. + CPUFeatures With(const CPUFeatures& other) const; + CPUFeatures With(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; + CPUFeatures Without(const CPUFeatures& other) const; + CPUFeatures Without(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; + + // Query features. + // Note that an empty query (like `Has(kNone)`) always returns true. bool Has(const CPUFeatures& other) const; - - // Test whether a single feature exists in this set. - // Note that `Has(kNone)` always returns true. - bool Has(Feature feature) const; - - // Test whether all of the specified features exist in this set. - template <typename T, typename... U> - bool Has(T first, U... others) const { - return Has(first) && Has(others...); - } + bool Has(Feature feature0, + Feature feature1 = kNone, + Feature feature2 = kNone, + Feature feature3 = kNone) const; // Return the number of enabled features. size_t Count() const; @@ -372,8 +288,9 @@ class CPUFeatures { const_iterator end() const; private: - // Each bit represents a feature. This set will be extended as needed. - std::bitset<kNumberOfFeatures> features_; + // Each bit represents a feature. This field will be replaced as needed if + // features are added. + uint64_t features_; friend std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); @@ -396,8 +313,8 @@ class CPUFeaturesConstIterator { bool operator!=(const CPUFeaturesConstIterator& other) const { return !(*this == other); } - CPUFeaturesConstIterator& operator++(); - CPUFeaturesConstIterator operator++(int); + CPUFeatures::Feature operator++(); + CPUFeatures::Feature operator++(int); CPUFeatures::Feature operator*() const { VIXL_ASSERT(IsValid()); @@ -442,17 +359,21 @@ class CPUFeaturesScope { // Start a CPUFeaturesScope on any object that implements // `CPUFeatures* GetCPUFeatures()`. template <typename T> - explicit CPUFeaturesScope(T* cpu_features_wrapper) + explicit CPUFeaturesScope(T* cpu_features_wrapper, + CPUFeatures::Feature feature0 = CPUFeatures::kNone, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), - old_features_(*cpu_features_) {} + old_features_(*cpu_features_) { + cpu_features_->Combine(feature0, feature1, feature2, feature3); + } - // Start a CPUFeaturesScope on any object that implements - // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled. - template <typename T, typename U, typename... V> - CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features) + template <typename T> + CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other) : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), old_features_(*cpu_features_) { - cpu_features_->Combine(first, features...); + cpu_features_->Combine(other); } ~CPUFeaturesScope() { *cpu_features_ = old_features_; } diff --git a/src/globals-vixl.h b/src/globals-vixl.h index 4dc8c024..640b4b9b 100644 --- a/src/globals-vixl.h +++ b/src/globals-vixl.h @@ -27,10 +27,6 @@ #ifndef VIXL_GLOBALS_H #define VIXL_GLOBALS_H -#if __cplusplus < 201402L -#error VIXL requires C++14 -#endif - // Get standard C99 macros for integer types. #ifndef __STDC_CONSTANT_MACROS #define __STDC_CONSTANT_MACROS @@ -70,8 +66,7 @@ typedef uint8_t byte; const int KBytes = 1024; const int MBytes = 1024 * KBytes; -const int kBitsPerByteLog2 = 3; -const int kBitsPerByte = 1 << kBitsPerByteLog2; +const int kBitsPerByte = 8; template <int SizeInBits> struct Unsigned; @@ -228,11 +223,8 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {} #if __cplusplus >= 201103L #define VIXL_OVERRIDE override -#define VIXL_CONSTEXPR constexpr -#define VIXL_HAS_CONSTEXPR 1 #else #define VIXL_OVERRIDE -#define VIXL_CONSTEXPR #endif // With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw diff --git a/src/invalset-vixl.h b/src/invalset-vixl.h index 8bd6035e..fbfb6a01 100644 --- a/src/invalset-vixl.h +++ b/src/invalset-vixl.h @@ -842,7 +842,9 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other) #if __cplusplus >= 201103L template <class S> InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept - : using_vector_(false), index_(0), inval_set_(NULL) { + : using_vector_(false), + index_(0), + inval_set_(NULL) { swap(*this, other); } #endif diff --git a/src/pool-manager-impl.h b/src/pool-manager-impl.h index a1bcaaad..66ecd6a4 100644 --- a/src/pool-manager-impl.h +++ b/src/pool-manager-impl.h @@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc, if (checkpoint < temp.min_location_) return true; } - bool temp_not_placed_yet = true; + bool tempNotPlacedYet = true; for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) { const PoolObject<T>& current = objects_[i]; - if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) { + if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) { checkpoint = UpdateCheckpointForObject(checkpoint, &temp); if (checkpoint < temp.min_location_) return true; if (CheckFuturePC(pc, checkpoint)) return true; - temp_not_placed_yet = false; + tempNotPlacedYet = false; } if (current.label_base_ == label_base) continue; checkpoint = UpdateCheckpointForObject(checkpoint, ¤t); @@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc, if (CheckFuturePC(pc, checkpoint)) return true; } // temp is the object with the smallest max_location_. - if (temp_not_placed_yet) { + if (tempNotPlacedYet) { checkpoint = UpdateCheckpointForObject(checkpoint, &temp); if (checkpoint < temp.min_location_) return true; } @@ -497,7 +497,7 @@ PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { } #endif // Delete objects the pool manager owns. - for (typename std::vector<LocationBase<T>*>::iterator + for (typename std::vector<LocationBase<T> *>::iterator iter = delete_on_destruction_.begin(), end = delete_on_destruction_.end(); iter != end; diff --git a/src/utils-vixl.h b/src/utils-vixl.h index 0ae6dfc0..c9287e40 100644 --- a/src/utils-vixl.h +++ b/src/utils-vixl.h @@ -67,7 +67,7 @@ namespace vixl { #endif template <typename T, size_t n> -constexpr size_t ArrayLength(const T (&)[n]) { +size_t ArrayLength(const T (&)[n]) { return n; } @@ -77,30 +77,25 @@ inline uint64_t GetUintMask(unsigned bits) { return base - 1; } -inline uint64_t GetSignMask(unsigned bits) { - VIXL_ASSERT(bits <= 64); - return UINT64_C(1) << (bits - 1); -} - // Check number width. // TODO: Refactor these using templates. inline bool IsIntN(unsigned n, uint32_t x) { - VIXL_ASSERT((0 < n) && (n <= 32)); - return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n)); + VIXL_ASSERT((0 < n) && (n < 32)); + uint32_t limit = UINT32_C(1) << (n - 1); + return x < limit; } inline bool IsIntN(unsigned n, int32_t x) { - VIXL_ASSERT((0 < n) && (n <= 32)); - if (n == 32) return true; + VIXL_ASSERT((0 < n) && (n < 32)); int32_t limit = INT32_C(1) << (n - 1); return (-limit <= x) && (x < limit); } inline bool IsIntN(unsigned n, uint64_t x) { - VIXL_ASSERT((0 < n) && (n <= 64)); - return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n)); + VIXL_ASSERT((0 < n) && (n < 64)); + uint64_t limit = UINT64_C(1) << (n - 1); + return x < limit; } inline bool IsIntN(unsigned n, int64_t x) { - VIXL_ASSERT((0 < n) && (n <= 64)); - if (n == 64) return true; + VIXL_ASSERT((0 < n) && (n < 64)); int64_t limit = INT64_C(1) << (n - 1); return (-limit <= x) && (x < limit); } @@ -109,8 +104,7 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { } inline bool IsUintN(unsigned n, uint32_t x) { - VIXL_ASSERT((0 < n) && (n <= 32)); - if (n >= 32) return true; + VIXL_ASSERT((0 < n) && (n < 32)); return !(x >> n); } inline bool IsUintN(unsigned n, int32_t x) { @@ -119,8 +113,7 @@ inline bool IsUintN(unsigned n, int32_t x) { return !(static_cast<uint32_t>(x) >> n); } inline bool IsUintN(unsigned n, uint64_t x) { - VIXL_ASSERT((0 < n) && (n <= 64)); - if (n >= 64) return true; + VIXL_ASSERT((0 < n) && (n < 64)); return !(x >> n); } inline bool IsUintN(unsigned n, int64_t x) { @@ -196,7 +189,7 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { } -inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) { +inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) { VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && (msb >= lsb)); return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); @@ -216,7 +209,8 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) { return result; } -inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { + +inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) { VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && (msb >= lsb)); uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); @@ -225,6 +219,7 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { return result; } + inline uint64_t RotateRight(uint64_t value, unsigned int rotate, unsigned int width) { @@ -282,19 +277,6 @@ VIXL_DEPRECATED("RawbitsToDouble", return RawbitsToDouble(bits); } -// Convert unsigned to signed numbers in a well-defined way (using two's -// complement representations). -inline int64_t RawbitsToInt64(uint64_t bits) { - return (bits >= UINT64_C(0x8000000000000000)) - ? (-static_cast<int64_t>(-bits - 1) - 1) - : static_cast<int64_t>(bits); -} - -inline int32_t RawbitsToInt32(uint32_t bits) { - return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1) - : static_cast<int32_t>(bits); -} - namespace internal { // Internal simulation class used solely by the simulator to @@ -389,10 +371,6 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { bool IsZero(Float16 value); -inline bool IsPositiveZero(double value) { - return (value == 0.0) && (copysign(1.0, value) > 0.0); -} - inline bool IsNaN(float value) { return std::isnan(value); } inline bool IsNaN(double value) { return std::isnan(value); } @@ -512,11 +490,11 @@ T ReverseBits(T value) { template <typename T> -inline T SignExtend(T val, int size_in_bits) { - VIXL_ASSERT(size_in_bits > 0); - T mask = (T(2) << (size_in_bits - 1)) - T(1); +inline T SignExtend(T val, int bitSize) { + VIXL_ASSERT(bitSize > 0); + T mask = (T(2) << (bitSize - 1)) - T(1); val &= mask; - T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits); + T sign_bits = -((val >> (bitSize - 1)) << bitSize); val |= sign_bits; return val; } @@ -598,7 +576,7 @@ T AlignUp(T pointer, // reinterpret_cast behaviour for other types. typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = - (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; + (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t mask = alignment - 1; @@ -618,7 +596,7 @@ T AlignDown(T pointer, // reinterpret_cast behaviour for other types. typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = - (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; + (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); size_t mask = alignment - 1; @@ -1002,42 +980,6 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} Int64 BitCount(Uint32 value); -// The algorithm used is adapted from the one described in section 8.2 of -// Hacker's Delight, by Henry S. Warren, Jr. -template <unsigned N, typename T> -int64_t MultiplyHigh(T u, T v) { - uint64_t u0, v0, w0, u1, v1, w1, w2, t; - VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64)); - uint64_t sign_mask = UINT64_C(1) << (N - 1); - uint64_t sign_ext = 0; - unsigned half_bits = N / 2; - uint64_t half_mask = GetUintMask(half_bits); - if (std::numeric_limits<T>::is_signed) { - sign_ext = UINT64_C(0xffffffffffffffff) << half_bits; - } - - VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); - VIXL_ASSERT(sizeof(u) == sizeof(u0)); - - u0 = u & half_mask; - u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0); - v0 = v & half_mask; - v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0); - - w0 = u0 * v0; - t = u1 * v0 + (w0 >> half_bits); - - w1 = t & half_mask; - w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0); - w1 = u0 * v1 + w1; - w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0); - - uint64_t value = u1 * v1 + w2 + w1; - int64_t result; - memcpy(&result, &value, sizeof(result)); - return result; -} - } // namespace internal // The default NaN values (for FPCR.DN=1). @@ -1302,8 +1244,9 @@ inline Float16 FPRoundToFloat16(int64_t sign, uint64_t mantissa, FPRounding round_mode) { return RawbitsToFloat16( - FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>( - sign, exponent, mantissa, round_mode)); + FPRound<uint16_t, + kFloat16ExponentBits, + kFloat16MantissaBits>(sign, exponent, mantissa, round_mode)); } @@ -1339,62 +1282,6 @@ Float16 FPToFloat16(double value, FPRounding round_mode, UseDefaultNaN DN, bool* exception = NULL); - -// Like static_cast<T>(value), but with specialisations for the Float16 type. -template <typename T, typename F> -T StaticCastFPTo(F value) { - return static_cast<T>(value); -} - -template <> -inline float StaticCastFPTo<float, Float16>(Float16 value) { - return FPToFloat(value, kIgnoreDefaultNaN); -} - -template <> -inline double StaticCastFPTo<double, Float16>(Float16 value) { - return FPToDouble(value, kIgnoreDefaultNaN); -} - -template <> -inline Float16 StaticCastFPTo<Float16, float>(float value) { - return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); -} - -template <> -inline Float16 StaticCastFPTo<Float16, double>(double value) { - return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); -} - -template <typename T> -uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) { - switch (size_in_bits) { - case 16: - return Float16ToRawbits(StaticCastFPTo<Float16>(value)); - case 32: - return FloatToRawbits(StaticCastFPTo<float>(value)); - case 64: - return DoubleToRawbits(StaticCastFPTo<double>(value)); - } - VIXL_UNREACHABLE(); - return 0; -} - -template <typename T> -T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { - VIXL_ASSERT(IsUintN(size_in_bits, value)); - switch (size_in_bits) { - case 16: - return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value))); - case 32: - return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value))); - case 64: - return StaticCastFPTo<T>(RawbitsToDouble(value)); - } - VIXL_UNREACHABLE(); - return 0; -} - } // namespace vixl #endif // VIXL_UTILS_H |