Revert^2 "Merge remote-tracking branch 'aosp/upstream-master' into..."

This reverts commit 0a75ba66aa15ea1cdb3f57d0efd4ce7e7c14d45f. Test: mma test-art-host-vixl Test: test.py --host --optimizing --jit --gtest Test: test.py --target --optimizing --jit Test: run-gtests.sh Change-Id: I052ab4d3243b0b9bee4c52d00ba4ef1d93a8d32c
author: Artem Serov <artem.serov@linaro.org> 2020-11-09 15:26:22 +0000
committer: Artem Serov <artem.serov@linaro.org> 2020-11-10 15:33:15 +0000
commit: 5a229a9923d9dab968d7fe186ffa47ac52f9e065 (patch)
tree: ca689c0233a213244a288044dbb6cfc86d99be60 /src
parent: aa1d76b1824ec9bcf66af54fbdc9d137a3c398d5 (diff)
download: vixl-5a229a9923d9dab968d7fe186ffa47ac52f9e065.tar.gz
36 files changed, 35236 insertions, 4027 deletions
diff --git a/src/aarch32/disasm-aarch32.cc b/src/aarch32/disasm-aarch32.cc
index 9ed3a831..535f60c8 100644
--- a/src/aarch32/disasm-aarch32.cc
+++ b/src/aarch32/disasm-aarch32.cc
@@ -8288,13 +8288,13 @@ void Disassembler::DecodeT32(uint32_t instr) {
                             UnallocatedT32(instr);
                             return;
                           }
-                          unsigned firstcond = (instr >> 20) & 0xf;
+                          unsigned first_cond = (instr >> 20) & 0xf;
                           unsigned mask = (instr >> 16) & 0xf;
-                          bool wasInITBlock = InITBlock();
-                          SetIT(Condition(firstcond), mask);
-                          it(Condition(firstcond), mask);
-                          if (wasInITBlock || (firstcond == 15) ||
-                              ((firstcond == al) &&
+                          bool was_in_it_block = InITBlock();
+                          SetIT(Condition(first_cond), mask);
+                          it(Condition(first_cond), mask);
+                          if (was_in_it_block || (first_cond == 15) ||
+                              ((first_cond == al) &&
                                (BitCount(Uint32(mask)) != 1))) {
                             UnpredictableT32(instr);
                           }
diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h
index d0ff52b3..6d76642f 100644
--- a/src/aarch32/macro-assembler-aarch32.h
+++ b/src/aarch32/macro-assembler-aarch32.h
@@ -268,7 +268,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
         generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
         pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #else
     USE(allow_macro_instructions_);
 #endif
@@ -283,7 +284,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
         generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
         pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #endif
   }
   MacroAssembler(byte* buffer, size_t size, InstructionSet isa = kDefaultISA)
@@ -296,7 +298,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
         generate_simulator_code_(VIXL_AARCH32_GENERATE_SIMULATOR_CODE),
         pool_end_(NULL) {
 #ifdef VIXL_DEBUG
-    SetAllowMacroInstructions(true);
+    SetAllowMacroInstructions(  // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+        true);
 #endif
   }
 
diff --git a/src/aarch32/operands-aarch32.h b/src/aarch32/operands-aarch32.h
index 1d18bfd3..2b452958 100644
--- a/src/aarch32/operands-aarch32.h
+++ b/src/aarch32/operands-aarch32.h
@@ -54,28 +54,16 @@ class Operand {
   // This is allowed to be an implicit constructor because Operand is
   // a wrapper class that doesn't normally perform any type conversion.
   Operand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoReg),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {}
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
   Operand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoReg),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {}
+      : imm_(immediate), rm_(NoReg), shift_(LSL), amount_(0), rs_(NoReg) {}
 
   // rm
   // where rm is the base register
   // This is allowed to be an implicit constructor because Operand is
   // a wrapper class that doesn't normally perform any type conversion.
   Operand(Register rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
-        rm_(rm),
-        shift_(LSL),
-        amount_(0),
-        rs_(NoReg) {
+      : imm_(0), rm_(rm), shift_(LSL), amount_(0), rs_(NoReg) {
     VIXL_ASSERT(rm_.IsValid());
   }
 
@@ -245,22 +233,18 @@ class NeonImmediate {
   // This is allowed to be an implicit constructor because NeonImmediate is
   // a wrapper class that doesn't normally perform any type conversion.
   NeonImmediate(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I32) {}
+      : imm_(immediate), immediate_type_(I32) {}
   NeonImmediate(int immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I32) {}
+      : imm_(immediate), immediate_type_(I32) {}
 
   // { #<immediate> }
   // where <immediate> is a 64 bit number
   // This is allowed to be an implicit constructor because NeonImmediate is
   // a wrapper class that doesn't normally perform any type conversion.
   NeonImmediate(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I64) {}
+      : imm_(immediate), immediate_type_(I64) {}
   NeonImmediate(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(I64) {}
+      : imm_(immediate), immediate_type_(I64) {}
 
   // { #<immediate> }
   // where <immediate> is a non zero floating point number which can be encoded
@@ -268,11 +252,9 @@ class NeonImmediate {
   // This is allowed to be an implicit constructor because NeonImmediate is
   // a wrapper class that doesn't normally perform any type conversion.
   NeonImmediate(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(F32) {}
+      : imm_(immediate), immediate_type_(F32) {}
   NeonImmediate(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        immediate_type_(F64) {}
+      : imm_(immediate), immediate_type_(F64) {}
 
   NeonImmediate(const NeonImmediate& src)
       : imm_(src.imm_), immediate_type_(src.immediate_type_) {}
@@ -374,29 +356,21 @@ std::ostream& operator<<(std::ostream& os, const NeonImmediate& operand);
 class NeonOperand {
  public:
   NeonOperand(int32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(uint32_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(int64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(uint64_t immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(float immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(double immediate)  // NOLINT(runtime/explicit)
-      : imm_(immediate),
-        rm_(NoDReg) {}
+      : imm_(immediate), rm_(NoDReg) {}
   NeonOperand(const NeonImmediate& imm)  // NOLINT(runtime/explicit)
-      : imm_(imm),
-        rm_(NoDReg) {}
+      : imm_(imm), rm_(NoDReg) {}
   NeonOperand(const VRegister& rm)  // NOLINT(runtime/explicit)
-      : imm_(0),
-        rm_(rm) {
+      : imm_(0), rm_(rm) {
     VIXL_ASSERT(rm_.IsValid());
   }
 
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 9e73ffaa..e98de89b 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -1044,7 +1044,7 @@ void Assembler::cls(const Register& rd, const Register& rn) {
   V(auti, AUTI)             \
   V(autd, AUTD)
 
-#define DEFINE_ASM_FUNCS(PRE, OP)                                  \
+#define VIXL_DEFINE_ASM_FUNC(PRE, OP)                              \
   void Assembler::PRE##a(const Register& xd, const Register& xn) { \
     VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));                      \
     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());                   \
@@ -1069,8 +1069,8 @@ void Assembler::cls(const Register& rd, const Register& rn) {
     Emit(SF(xd) | OP##ZB | Rd(xd));                                \
   }
 
-PAUTH_VARIATIONS(DEFINE_ASM_FUNCS)
-#undef DEFINE_ASM_FUNCS
+PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 void Assembler::pacga(const Register& xd,
                       const Register& xn,
@@ -1141,7 +1141,13 @@ void Assembler::LoadStorePair(const CPURegister& rt,
       addrmodeop = LoadStorePairPostIndexFixed;
     }
   }
-  Emit(addrmodeop | memop);
+
+  Instr emitop = addrmodeop | memop;
+
+  // Only X registers may be specified for ldpsw.
+  VIXL_ASSERT(((emitop & LoadStorePairMask) != LDPSW_x) || rt.IsX());
+
+  Emit(emitop);
 }
 
 
@@ -1381,10 +1387,16 @@ void Assembler::ldr(const CPURegister& rt, int64_t imm19) {
 }
 
 
-void Assembler::prfm(PrefetchOperation op, int64_t imm19) {
+void Assembler::prfm(int op, int64_t imm19) {
   Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19));
 }
 
+void Assembler::prfm(PrefetchOperation op, int64_t imm19) {
+  // Passing unnamed values in 'op' is undefined behaviour in C++.
+  VIXL_ASSERT(IsNamedPrefetchOperation(op));
+  prfm(static_cast<int>(op), imm19);
+}
+
 
 // Exclusive-access instructions.
 void Assembler::stxrb(const Register& rs,
@@ -1635,17 +1647,18 @@ void Assembler::ldlar(const Register& rt, const MemOperand& src) {
   V(casal, CASAL)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP)                                          \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                     \
   void Assembler::FN(const Register& rs,                                 \
                      const Register& rt,                                 \
                      const MemOperand& src) {                            \
     VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics));                          \
     VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0));      \
+    VIXL_ASSERT(AreSameFormat(rs, rt));                                  \
     LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w;             \
     Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
   }
-COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 // clang-format off
 #define COMPARE_AND_SWAP_W_LIST(V) \
@@ -1659,7 +1672,7 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
   V(casalh, CASALH)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP)                                          \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                     \
   void Assembler::FN(const Register& rs,                                 \
                      const Register& rt,                                 \
                      const MemOperand& src) {                            \
@@ -1667,8 +1680,8 @@ COMPARE_AND_SWAP_W_X_LIST(DEFINE_ASM_FUNC)
     VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0));      \
     Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
   }
-COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -1679,7 +1692,7 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
   V(caspal, CASPAL)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP)                                          \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                     \
   void Assembler::FN(const Register& rs,                                 \
                      const Register& rs1,                                \
                      const Register& rt,                                 \
@@ -1691,11 +1704,12 @@ COMPARE_AND_SWAP_W_LIST(DEFINE_ASM_FUNC)
     VIXL_ASSERT(AreEven(rs, rt));                                        \
     VIXL_ASSERT(AreConsecutive(rs, rs1));                                \
     VIXL_ASSERT(AreConsecutive(rt, rt1));                                \
+    VIXL_ASSERT(AreSameFormat(rs, rs1, rt, rt1));                        \
     LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w;             \
     Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \
   }
-COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+COMPARE_AND_SWAP_PAIR_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 // These macros generate all the variations of the atomic memory operations,
 // e.g. ldadd, ldadda, ldaddb, staddl, etc.
@@ -1846,7 +1860,7 @@ void Assembler::ldapursw(const Register& rt, const MemOperand& src) {
   Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast<int>(offset)));
 }
 
-void Assembler::prfm(PrefetchOperation op,
+void Assembler::prfm(int op,
                      const MemOperand& address,
                      LoadStoreScalingOption option) {
   VIXL_ASSERT(option != RequireUnscaledOffset);
@@ -1854,8 +1868,16 @@ void Assembler::prfm(PrefetchOperation op,
   Prefetch(op, address, option);
 }
 
+void Assembler::prfm(PrefetchOperation op,
+                     const MemOperand& address,
+                     LoadStoreScalingOption option) {
+  // Passing unnamed values in 'op' is undefined behaviour in C++.
+  VIXL_ASSERT(IsNamedPrefetchOperation(op));
+  prfm(static_cast<int>(op), address, option);
+}
 
-void Assembler::prfum(PrefetchOperation op,
+
+void Assembler::prfum(int op,
                       const MemOperand& address,
                       LoadStoreScalingOption option) {
   VIXL_ASSERT(option != RequireScaledOffset);
@@ -1863,11 +1885,25 @@ void Assembler::prfum(PrefetchOperation op,
   Prefetch(op, address, option);
 }
 
+void Assembler::prfum(PrefetchOperation op,
+                      const MemOperand& address,
+                      LoadStoreScalingOption option) {
+  // Passing unnamed values in 'op' is undefined behaviour in C++.
+  VIXL_ASSERT(IsNamedPrefetchOperation(op));
+  prfum(static_cast<int>(op), address, option);
+}
 
-void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) {
+
+void Assembler::prfm(int op, RawLiteral* literal) {
   prfm(op, static_cast<int>(LinkAndGetWordOffsetTo(literal)));
 }
 
+void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) {
+  // Passing unnamed values in 'op' is undefined behaviour in C++.
+  VIXL_ASSERT(IsNamedPrefetchOperation(op));
+  prfm(static_cast<int>(op), literal);
+}
+
 
 void Assembler::sys(int op1, int crn, int crm, int op2, const Register& xt) {
   VIXL_ASSERT(xt.Is64Bits());
@@ -1933,6 +1969,7 @@ void Assembler::LoadStoreStructVerify(const VRegister& vt,
   // Assert that addressing mode is either offset (with immediate 0), post
   // index by immediate of the size of the register list, or post index by a
   // value in a core register.
+  VIXL_ASSERT(vt.HasSize() && vt.HasLaneSize());
   if (addr.IsImmediateOffset()) {
     VIXL_ASSERT(addr.GetOffset() == 0);
   } else {
@@ -2290,6 +2327,7 @@ void Assembler::LoadStoreStructSingle(const VRegister& vt,
   // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
   // number of lanes, and T is b, h, s or d.
   unsigned lane_size = vt.GetLaneSizeInBytes();
+  VIXL_ASSERT(lane_size > 0);
   VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size));
 
   // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
@@ -2424,7 +2462,7 @@ void Assembler::NEON3DifferentHN(const VRegister& vd,
 // clang-format on
 
 
-#define DEFINE_ASM_FUNC(FN, OP, AS)                   \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS)                   \
 void Assembler::FN(const VRegister& vd,               \
                    const VRegister& vn,               \
                    const VRegister& vm) {             \
@@ -2432,8 +2470,8 @@ void Assembler::FN(const VRegister& vd,               \
   VIXL_ASSERT(AS);                                    \
   NEON3DifferentL(vd, vn, vm, OP);                    \
 }
-NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 // clang-format off
 #define NEON_3DIFF_HN_LIST(V)         \
@@ -2447,7 +2485,7 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
   V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP, AS)          \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS)     \
   void Assembler::FN(const VRegister& vd,    \
                      const VRegister& vn,    \
                      const VRegister& vm) {  \
@@ -2455,8 +2493,8 @@ NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
     VIXL_ASSERT(AS);                         \
     NEON3DifferentHN(vd, vn, vm, OP);        \
   }
-NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3DIFF_HN_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 void Assembler::uaddw(const VRegister& vd,
                       const VRegister& vn,
@@ -3104,7 +3142,7 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd,
   V(fcvtau, NEON_FCVTAU, FCVTAU)     \
   V(fcvtas, NEON_FCVTAS, FCVTAS)
 
-#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP)                     \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)                 \
   void Assembler::FN(const Register& rd, const VRegister& vn) {  \
     VIXL_ASSERT(CPUHas(CPUFeatures::kFP));                       \
     if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));     \
@@ -3119,8 +3157,8 @@ void Assembler::NEONFP16ConvertToInt(const VRegister& vd,
       NEONFPConvertToInt(vd, vn, VEC_OP);                        \
     }                                                            \
   }
-NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
-#undef DEFINE_ASM_FUNCS
+NEON_FP2REGMISC_FCVT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
@@ -3308,7 +3346,7 @@ void Assembler::NEON3SameFP16(const VRegister& vd,
   V(frecpe,  NEON_FRECPE,  NEON_FRECPE_scalar,  NEON_FRECPE_H_scalar)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H)                        \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H)                   \
   void Assembler::FN(const VRegister& vd, const VRegister& vn) {             \
     VIXL_ASSERT(CPUHas(CPUFeatures::kFP));                                   \
     Instr op;                                                                \
@@ -3348,8 +3386,8 @@ void Assembler::NEON3SameFP16(const VRegister& vd,
       NEONFP2RegMisc(vd, vn, op);                                            \
     }                                                                        \
   }
-NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 // clang-format off
 #define NEON_FP2REGMISC_V85_LIST(V)       \
@@ -3359,7 +3397,7 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
   V(frint64z,  NEON_FRINT64Z,  FRINT64Z)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)                                    \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)                               \
   void Assembler::FN(const VRegister& vd, const VRegister& vn) {               \
     VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt)); \
     Instr op;                                                                  \
@@ -3373,8 +3411,8 @@ NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
     }                                                                          \
     NEONFP2RegMisc(vd, vn, op);                                                \
   }
-NEON_FP2REGMISC_V85_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP2REGMISC_V85_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 void Assembler::NEONFP2RegMiscFP16(const VRegister& vd,
                                    const VRegister& vn,
@@ -3638,7 +3676,7 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
   V(uqrshl,   NEON_UQRSHL,   true)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP, AS)          \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS)     \
   void Assembler::FN(const VRegister& vd,    \
                      const VRegister& vn,    \
                      const VRegister& vm) {  \
@@ -3646,8 +3684,8 @@ void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
     VIXL_ASSERT(AS);                         \
     NEON3Same(vd, vn, vm, OP);               \
   }
-NEON_3SAME_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 // clang-format off
 #define NEON_FP3SAME_OP_LIST(V)                                        \
@@ -3680,7 +3718,7 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC)
 // TODO: This macro is complicated because it classifies the instructions in the
 // macro list above, and treats each case differently. It could be somewhat
 // simpler if we were to split the macro, at the cost of some duplication.
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H)                    \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H)               \
   void Assembler::FN(const VRegister& vd,                                \
                      const VRegister& vn,                                \
                      const VRegister& vm) {                              \
@@ -3720,8 +3758,8 @@ NEON_3SAME_LIST(DEFINE_ASM_FUNC)
       NEONFP3Same(vd, vn, vm, op);                                       \
     }                                                                    \
   }
-NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -3732,7 +3770,7 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
   V(fmlsl2,  NEON_FMLSL2)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, VEC_OP)                         \
+#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP)                    \
   void Assembler::FN(const VRegister& vd,                   \
                      const VRegister& vn,                   \
                      const VRegister& vm) {                 \
@@ -3744,8 +3782,8 @@ NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
                 (vd.Is4S() && vn.Is4H() && vm.Is4H()));     \
     Emit(FPFormat(vd) | VEC_OP | Rm(vm) | Rn(vn) | Rd(vd)); \
   }
-NEON_FHM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FHM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 void Assembler::addp(const VRegister& vd, const VRegister& vn) {
@@ -4138,7 +4176,7 @@ void Assembler::udot(const VRegister& vd,
   V(sqrdmulh, NEON_SQRDMULH_byelement, true)          \
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP, AS)                     \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS)                     \
   void Assembler::FN(const VRegister& vd,               \
                      const VRegister& vn,               \
                      const VRegister& vm,               \
@@ -4147,8 +4185,8 @@ void Assembler::udot(const VRegister& vd,
     VIXL_ASSERT(AS);                                    \
     NEONByElement(vd, vn, vm, vm_index, OP);            \
   }
-NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -4157,7 +4195,7 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
   V(sqrdmlsh, NEON_SQRDMLSH_byelement)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP)                                 \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                            \
   void Assembler::FN(const VRegister& vd,                       \
                      const VRegister& vn,                       \
                      const VRegister& vm,                       \
@@ -4165,8 +4203,8 @@ NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
     VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); \
     NEONByElement(vd, vn, vm, vm_index, OP);                    \
   }
-NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -4177,7 +4215,7 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
   V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP, OP_H)                                  \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H)                             \
   void Assembler::FN(const VRegister& vd,                              \
                      const VRegister& vn,                              \
                      const VRegister& vm,                              \
@@ -4186,8 +4224,8 @@ NEON_BYELEMENT_RDM_LIST(DEFINE_ASM_FUNC)
     if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
     NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H);                   \
   }
-NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -4213,7 +4251,7 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
 // clang-format on
 
 
-#define DEFINE_ASM_FUNC(FN, OP, AS)           \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS)      \
   void Assembler::FN(const VRegister& vd,     \
                      const VRegister& vn,     \
                      const VRegister& vm,     \
@@ -4222,8 +4260,8 @@ NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
     VIXL_ASSERT(AS);                          \
     NEONByElementL(vd, vn, vm, vm_index, OP); \
   }
-NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -4235,7 +4273,7 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
 // clang-format on
 
 
-#define DEFINE_ASM_FUNC(FN, OP)                                        \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                   \
   void Assembler::FN(const VRegister& vd,                              \
                      const VRegister& vn,                              \
                      const VRegister& vm,                              \
@@ -4252,8 +4290,8 @@ NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
     Emit(FPFormat(vd) | OP | Rd(vd) | Rn(vn) | Rm(vm) |                \
          ImmNEONHLM(vm_index, 3));                                     \
   }
-NEON_BYELEMENT_FHM_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_BYELEMENT_FHM_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
   VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
@@ -4763,13 +4801,13 @@ void Assembler::NEONAcrossLanes(const VRegister& vd,
   V(uminv,   NEON_UMINV)
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP)                                  \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                             \
   void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
     VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));                     \
     NEONAcrossLanes(vd, vn, OP, 0);                              \
   }
-NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 // clang-format off
@@ -4780,15 +4818,15 @@ NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
   V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \
 // clang-format on
 
-#define DEFINE_ASM_FUNC(FN, OP, OP_H)                            \
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H)                            \
   void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
     VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));   \
     if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));  \
     VIXL_ASSERT(vd.Is1S() || vd.Is1H());                         \
     NEONAcrossLanes(vd, vn, OP, OP_H);                           \
   }
-NEON_ACROSSLANES_FP_LIST(DEFINE_ASM_FUNC)
-#undef DEFINE_ASM_FUNC
+NEON_ACROSSLANES_FP_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
 
 
 void Assembler::NEONPerm(const VRegister& vd,
@@ -4870,9 +4908,9 @@ void Assembler::NEONShiftLeftImmediate(const VRegister& vd,
                                        const VRegister& vn,
                                        int shift,
                                        NEONShiftImmediateOp op) {
-  int laneSizeInBits = vn.GetLaneSizeInBits();
-  VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
-  NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
+  int lane_size_in_bits = vn.GetLaneSizeInBits();
+  VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+  NEONShiftImmediate(vd, vn, op, (lane_size_in_bits + shift) << 16);
 }
 
 
@@ -4880,9 +4918,9 @@ void Assembler::NEONShiftRightImmediate(const VRegister& vd,
                                         const VRegister& vn,
                                         int shift,
                                         NEONShiftImmediateOp op) {
-  int laneSizeInBits = vn.GetLaneSizeInBits();
-  VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
-  NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
+  int lane_size_in_bits = vn.GetLaneSizeInBits();
+  VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits));
+  NEONShiftImmediate(vd, vn, op, ((2 * lane_size_in_bits) - shift) << 16);
 }
 
 
@@ -4890,9 +4928,9 @@ void Assembler::NEONShiftImmediateL(const VRegister& vd,
                                     const VRegister& vn,
                                     int shift,
                                     NEONShiftImmediateOp op) {
-  int laneSizeInBits = vn.GetLaneSizeInBits();
-  VIXL_ASSERT((shift >= 0) && (shift < laneSizeInBits));
-  int immh_immb = (laneSizeInBits + shift) << 16;
+  int lane_size_in_bits = vn.GetLaneSizeInBits();
+  VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+  int immh_immb = (lane_size_in_bits + shift) << 16;
 
   VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
               (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
@@ -4908,9 +4946,9 @@ void Assembler::NEONShiftImmediateN(const VRegister& vd,
                                     int shift,
                                     NEONShiftImmediateOp op) {
   Instr q, scalar;
-  int laneSizeInBits = vd.GetLaneSizeInBits();
-  VIXL_ASSERT((shift >= 1) && (shift <= laneSizeInBits));
-  int immh_immb = (2 * laneSizeInBits - shift) << 16;
+  int lane_size_in_bits = vd.GetLaneSizeInBits();
+  VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits));
+  int immh_immb = (2 * lane_size_in_bits - shift) << 16;
 
   if (vn.IsScalar()) {
     VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
@@ -5271,6 +5309,7 @@ void Assembler::MoveWide(const Register& rd,
   } else {
     // Calculate a new immediate and shift combination to encode the immediate
     // argument.
+    VIXL_ASSERT(shift == -1);
     shift = 0;
     if ((imm & 0xffffffffffff0000) == 0) {
       // Nothing to do.
@@ -5604,7 +5643,7 @@ void Assembler::DataProcExtendedRegister(const Register& rd,
 
 
 Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
-                                     unsigned access_size,
+                                     unsigned access_size_in_bytes_log2,
                                      LoadStoreScalingOption option) {
   Instr base = RnSP(addr.GetBaseRegister());
   int64_t offset = addr.GetOffset();
@@ -5614,21 +5653,22 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
         (option == PreferUnscaledOffset) || (option == RequireUnscaledOffset);
     if (prefer_unscaled && IsImmLSUnscaled(offset)) {
       // Use the unscaled addressing mode.
-      return base | LoadStoreUnscaledOffsetFixed |
-             ImmLS(static_cast<int>(offset));
+      return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
     }
 
     if ((option != RequireUnscaledOffset) &&
-        IsImmLSScaled(offset, access_size)) {
+        IsImmLSScaled(offset, access_size_in_bytes_log2)) {
+      // We need `offset` to be positive for the shift to be well-defined.
+      // IsImmLSScaled should check this.
+      VIXL_ASSERT(offset >= 0);
       // Use the scaled addressing mode.
       return base | LoadStoreUnsignedOffsetFixed |
-             ImmLSUnsigned(static_cast<int>(offset) >> access_size);
+             ImmLSUnsigned(offset >> access_size_in_bytes_log2);
     }
 
     if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) {
       // Use the unscaled addressing mode.
-      return base | LoadStoreUnscaledOffsetFixed |
-             ImmLS(static_cast<int>(offset));
+      return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset);
     }
   }
 
@@ -5649,17 +5689,17 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
 
     // Shifts are encoded in one bit, indicating a left shift by the memory
     // access size.
-    VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size));
+    VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size_in_bytes_log2));
     return base | LoadStoreRegisterOffsetFixed | Rm(addr.GetRegisterOffset()) |
            ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
   }
 
   if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
-    return base | LoadStorePreIndexFixed | ImmLS(static_cast<int>(offset));
+    return base | LoadStorePreIndexFixed | ImmLS(offset);
   }
 
   if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
-    return base | LoadStorePostIndexFixed | ImmLS(static_cast<int>(offset));
+    return base | LoadStorePostIndexFixed | ImmLS(offset);
   }
 
   // If this point is reached, the MemOperand (addr) cannot be encoded.
@@ -5694,7 +5734,7 @@ void Assembler::LoadStorePAC(const Register& xt,
 }
 
 
-void Assembler::Prefetch(PrefetchOperation op,
+void Assembler::Prefetch(int op,
                          const MemOperand& addr,
                          LoadStoreScalingOption option) {
   VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset());
@@ -5703,6 +5743,14 @@ void Assembler::Prefetch(PrefetchOperation op,
   Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option));
 }
 
+void Assembler::Prefetch(PrefetchOperation op,
+                         const MemOperand& addr,
+                         LoadStoreScalingOption option) {
+  // Passing unnamed values in 'op' is undefined behaviour in C++.
+  VIXL_ASSERT(IsNamedPrefetchOperation(op));
+  Prefetch(static_cast<int>(op), addr, option);
+}
+
 
 bool Assembler::IsImmAddSub(int64_t immediate) {
   return IsUint12(immediate) ||
@@ -5788,17 +5836,17 @@ bool Assembler::IsImmFP64(double imm) {
 }
 
 
-bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size) {
-  VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
-  return IsMultiple(offset, 1 << access_size) &&
-         IsInt7(offset / (1 << access_size));
+bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) {
+  VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
+  return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
+         IsInt7(offset / (1 << access_size_in_bytes_log2));
 }
 
 
-bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size) {
-  VIXL_ASSERT(access_size <= kQRegSizeInBytesLog2);
-  return IsMultiple(offset, 1 << access_size) &&
-         IsUint12(offset / (1 << access_size));
+bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) {
+  VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2);
+  return IsMultiple(offset, 1 << access_size_in_bytes_log2) &&
+         IsUint12(offset / (1 << access_size_in_bytes_log2));
 }
 
 
@@ -5832,7 +5880,8 @@ bool Assembler::IsImmLogical(uint64_t value,
                              unsigned* n,
                              unsigned* imm_s,
                              unsigned* imm_r) {
-  VIXL_ASSERT((width == kWRegSize) || (width == kXRegSize));
+  VIXL_ASSERT((width == kBRegSize) || (width == kHRegSize) ||
+              (width == kSRegSize) || (width == kDRegSize));
 
   bool negate = false;
 
@@ -5873,16 +5922,18 @@ bool Assembler::IsImmLogical(uint64_t value,
     value = ~value;
   }
 
-  if (width == kWRegSize) {
-    // To handle 32-bit logical immediates, the very easiest thing is to repeat
-    // the input value twice to make a 64-bit word. The correct encoding of that
-    // as a logical immediate will also be the correct encoding of the 32-bit
-    // value.
+  if (width <= kWRegSize) {
+    // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat
+    // the input value to fill a 64-bit word. The correct encoding of that as a
+    // logical immediate will also be the correct encoding of the value.
 
-    // Avoid making the assumption that the most-significant 32 bits are zero by
+    // Avoid making the assumption that the most-significant 56/48/32 bits are zero by
     // shifting the value left and duplicating it.
-    value <<= kWRegSize;
-    value |= value >> kWRegSize;
+    for (unsigned bits = width; bits <= kWRegSize; bits *= 2) {
+      value <<= bits;
+      uint64_t mask = (UINT64_C(1) << bits) - 1;
+      value |= ((value >> bits) & mask);
+    }
   }
 
   // The basic analysis idea: imagine our input word looks like this.
@@ -6186,152 +6237,5 @@ bool Assembler::CPUHas(SystemRegister sysreg) const {
 }
 
 
-bool AreAliased(const CPURegister& reg1,
-                const CPURegister& reg2,
-                const CPURegister& reg3,
-                const CPURegister& reg4,
-                const CPURegister& reg5,
-                const CPURegister& reg6,
-                const CPURegister& reg7,
-                const CPURegister& reg8) {
-  int number_of_valid_regs = 0;
-  int number_of_valid_fpregs = 0;
-
-  RegList unique_regs = 0;
-  RegList unique_fpregs = 0;
-
-  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
-
-  for (size_t i = 0; i < ArrayLength(regs); i++) {
-    if (regs[i].IsRegister()) {
-      number_of_valid_regs++;
-      unique_regs |= regs[i].GetBit();
-    } else if (regs[i].IsVRegister()) {
-      number_of_valid_fpregs++;
-      unique_fpregs |= regs[i].GetBit();
-    } else {
-      VIXL_ASSERT(!regs[i].IsValid());
-    }
-  }
-
-  int number_of_unique_regs = CountSetBits(unique_regs);
-  int number_of_unique_fpregs = CountSetBits(unique_fpregs);
-
-  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
-  VIXL_ASSERT(number_of_valid_fpregs >= number_of_unique_fpregs);
-
-  return (number_of_valid_regs != number_of_unique_regs) ||
-         (number_of_valid_fpregs != number_of_unique_fpregs);
-}
-
-
-bool AreSameSizeAndType(const CPURegister& reg1,
-                        const CPURegister& reg2,
-                        const CPURegister& reg3,
-                        const CPURegister& reg4,
-                        const CPURegister& reg5,
-                        const CPURegister& reg6,
-                        const CPURegister& reg7,
-                        const CPURegister& reg8) {
-  VIXL_ASSERT(reg1.IsValid());
-  bool match = true;
-  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
-  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
-  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
-  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
-  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
-  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
-  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
-  return match;
-}
-
-bool AreEven(const CPURegister& reg1,
-             const CPURegister& reg2,
-             const CPURegister& reg3,
-             const CPURegister& reg4,
-             const CPURegister& reg5,
-             const CPURegister& reg6,
-             const CPURegister& reg7,
-             const CPURegister& reg8) {
-  VIXL_ASSERT(reg1.IsValid());
-  bool even = (reg1.GetCode() % 2) == 0;
-  even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
-  even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
-  even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
-  even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
-  even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
-  even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
-  even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
-  return even;
-}
-
-
-bool AreConsecutive(const CPURegister& reg1,
-                    const CPURegister& reg2,
-                    const CPURegister& reg3,
-                    const CPURegister& reg4) {
-  VIXL_ASSERT(reg1.IsValid());
-
-  if (!reg2.IsValid()) {
-    return true;
-  } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfRegisters)) {
-    return false;
-  }
-
-  if (!reg3.IsValid()) {
-    return true;
-  } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfRegisters)) {
-    return false;
-  }
-
-  if (!reg4.IsValid()) {
-    return true;
-  } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfRegisters)) {
-    return false;
-  }
-
-  return true;
-}
-
-
-bool AreSameFormat(const VRegister& reg1,
-                   const VRegister& reg2,
-                   const VRegister& reg3,
-                   const VRegister& reg4) {
-  VIXL_ASSERT(reg1.IsValid());
-  bool match = true;
-  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
-  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
-  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
-  return match;
-}
-
-
-bool AreConsecutive(const VRegister& reg1,
-                    const VRegister& reg2,
-                    const VRegister& reg3,
-                    const VRegister& reg4) {
-  VIXL_ASSERT(reg1.IsValid());
-
-  if (!reg2.IsValid()) {
-    return true;
-  } else if (reg2.GetCode() != ((reg1.GetCode() + 1) % kNumberOfVRegisters)) {
-    return false;
-  }
-
-  if (!reg3.IsValid()) {
-    return true;
-  } else if (reg3.GetCode() != ((reg2.GetCode() + 1) % kNumberOfVRegisters)) {
-    return false;
-  }
-
-  if (!reg4.IsValid()) {
-    return true;
-  } else if (reg4.GetCode() != ((reg3.GetCode() + 1) % kNumberOfVRegisters)) {
-    return false;
-  }
-
-  return true;
-}
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 3ccda1a6..f7aafd07 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -1089,18 +1089,6 @@ class Assembler : public vixl::internal::AssemblerBase {
   // zero [Armv8.3].
   void pacdza(const Register& xd);
 
-  // Pointer Authentication Code for Data address, using key A, with address in
-  // x17 and modifier in x16 [Armv8.3].
-  void pacda1716();
-
-  // Pointer Authentication Code for Data address, using key A, with address in
-  // LR and modifier in SP [Armv8.3].
-  void pacdasp();
-
-  // Pointer Authentication Code for Data address, using key A, with address in
-  // LR and a modifier of zero [Armv8.3].
-  void pacdaz();
-
   // Pointer Authentication Code for Data address, using key B [Armv8.3].
   void pacdb(const Register& xd, const Register& xn);
 
@@ -1108,18 +1096,6 @@ class Assembler : public vixl::internal::AssemblerBase {
   // zero [Armv8.3].
   void pacdzb(const Register& xd);
 
-  // Pointer Authentication Code for Data address, using key B, with address in
-  // x17 and modifier in x16 [Armv8.3].
-  void pacdb1716();
-
-  // Pointer Authentication Code for Data address, using key B, with address in
-  // LR and modifier in SP [Armv8.3].
-  void pacdbsp();
-
-  // Pointer Authentication Code for Data address, using key B, with address in
-  // LR and a modifier of zero [Armv8.3].
-  void pacdbz();
-
   // Pointer Authentication Code, using Generic key [Armv8.3].
   void pacga(const Register& xd, const Register& xn, const Register& xm);
 
@@ -1167,36 +1143,12 @@ class Assembler : public vixl::internal::AssemblerBase {
   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
   void autdza(const Register& xd);
 
-  // Authenticate Data address, using key A, with address in x17 and modifier in
-  // x16 [Armv8.3].
-  void autda1716();
-
-  // Authenticate Data address, using key A, with address in LR and modifier in
-  // SP [Armv8.3].
-  void autdasp();
-
-  // Authenticate Data address, using key A, with address in LR and a modifier
-  // of zero [Armv8.3].
-  void autdaz();
-
   // Authenticate Data address, using key B [Armv8.3].
   void autdb(const Register& xd, const Register& xn);
 
   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
   void autdzb(const Register& xd);
 
-  // Authenticate Data address, using key B, with address in x17 and modifier in
-  // x16 [Armv8.3].
-  void autdb1716();
-
-  // Authenticate Data address, using key B, with address in LR and modifier in
-  // SP [Armv8.3].
-  void autdbsp();
-
-  // Authenticate Data address, using key B, with address in LR and a modifier
-  // of zero [Armv8.3].
-  void autdbz();
-
   // Strip Pointer Authentication Code of Data address [Armv8.3].
   void xpacd(const Register& xd);
 
@@ -2112,6 +2064,22 @@ class Assembler : public vixl::internal::AssemblerBase {
   // Prefetch from pc + imm19 << 2.
   void prfm(PrefetchOperation op, int64_t imm19);
 
+  // Prefetch memory (allowing unallocated hints).
+  void prfm(int op,
+            const MemOperand& addr,
+            LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Prefetch memory (with unscaled offset, allowing unallocated hints).
+  void prfum(int op,
+             const MemOperand& addr,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Prefetch memory in the literal pool (allowing unallocated hints).
+  void prfm(int op, RawLiteral* literal);
+
+  // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
+  void prfm(int op, int64_t imm19);
+
   // Move instructions. The default shift of -1 indicates that the move
   // instruction will calculate an appropriate 16-bit immediate and left shift
   // that is equal to the 64-bit immediate argument. If an explicit left shift
@@ -3618,6 +3586,2240 @@ class Assembler : public vixl::internal::AssemblerBase {
              const VRegister& vm,
              int rot);
 
+  // Scalable Vector Extensions.
+
+  // Absolute value (predicated).
+  void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Add vectors (predicated).
+  void add(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Add vectors (unpredicated).
+  void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Add immediate (unpredicated).
+  void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+  // Add multiple of predicate register size to scalar register.
+  void addpl(const Register& xd, const Register& xn, int imm6);
+
+  // Add multiple of vector register size to scalar register.
+  void addvl(const Register& xd, const Register& xn, int imm6);
+
+  // Compute vector address.
+  void adr(const ZRegister& zd, const SVEMemOperand& addr);
+
+  // Bitwise AND predicates.
+  void and_(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise AND vectors (predicated).
+  void and_(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Bitwise AND with immediate (unpredicated).
+  void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise AND vectors (unpredicated).
+  void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Bitwise AND predicates.
+  void ands(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise AND reduction to scalar.
+  void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Arithmetic shift right by immediate (predicated).
+  void asr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift);
+
+  // Arithmetic shift right by 64-bit wide elements (predicated).
+  void asr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Arithmetic shift right by immediate (unpredicated).
+  void asr(const ZRegister& zd, const ZRegister& zn, int shift);
+
+  // Arithmetic shift right by 64-bit wide elements (unpredicated).
+  void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Arithmetic shift right for divide by immediate (predicated).
+  void asrd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            int shift);
+
+  // Reversed arithmetic shift right by vector (predicated).
+  void asrr(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Bitwise clear predicates.
+  void bic(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Bitwise clear vectors (predicated).
+  void bic(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Bitwise clear bits using immediate (unpredicated).
+  void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise clear vectors (unpredicated).
+  void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Bitwise clear predicates.
+  void bics(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Break after first true condition.
+  void brka(const PRegisterWithLaneSize& pd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Break after first true condition.
+  void brkas(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn);
+
+  // Break before first true condition.
+  void brkb(const PRegisterWithLaneSize& pd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Break before first true condition.
+  void brkbs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn);
+
+  // Propagate break to next partition.
+  void brkn(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Propagate break to next partition.
+  void brkns(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm);
+
+  // Break after first true condition, propagating from previous partition.
+  void brkpa(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm);
+
+  // Break after first true condition, propagating from previous partition.
+  void brkpas(const PRegisterWithLaneSize& pd,
+              const PRegisterZ& pg,
+              const PRegisterWithLaneSize& pn,
+              const PRegisterWithLaneSize& pm);
+
+  // Break before first true condition, propagating from previous partition.
+  void brkpb(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm);
+
+  // Break before first true condition, propagating from previous partition.
+  void brkpbs(const PRegisterWithLaneSize& pd,
+              const PRegisterZ& pg,
+              const PRegisterWithLaneSize& pn,
+              const PRegisterWithLaneSize& pm);
+
+  // Conditionally extract element after last to general-purpose register.
+  void clasta(const Register& rd,
+              const PRegister& pg,
+              const Register& rn,
+              const ZRegister& zm);
+
+  // Conditionally extract element after last to SIMD&FP scalar register.
+  void clasta(const VRegister& vd,
+              const PRegister& pg,
+              const VRegister& vn,
+              const ZRegister& zm);
+
+  // Conditionally extract element after last to vector register.
+  void clasta(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Conditionally extract last element to general-purpose register.
+  void clastb(const Register& rd,
+              const PRegister& pg,
+              const Register& rn,
+              const ZRegister& zm);
+
+  // Conditionally extract last element to SIMD&FP scalar register.
+  void clastb(const VRegister& vd,
+              const PRegister& pg,
+              const VRegister& vn,
+              const ZRegister& zm);
+
+  // Conditionally extract last element to vector register.
+  void clastb(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Count leading sign bits (predicated).
+  void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Count leading zero bits (predicated).
+  void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  void cmp(Condition cond,
+           const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Compare vector to 64-bit wide elements.
+  void cmpeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmpeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Compare vector to 64-bit wide elements.
+  void cmpge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmpge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Compare vector to 64-bit wide elements.
+  void cmpgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmpgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Compare vector to 64-bit wide elements.
+  void cmphi(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmphi(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             unsigned imm7);
+
+  // Compare vector to 64-bit wide elements.
+  void cmphs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmphs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             unsigned imm7);
+
+  // Compare vector to 64-bit wide elements.
+  void cmple(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmple(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Compare vector to 64-bit wide elements.
+  void cmplo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmplo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             unsigned imm7);
+
+  // Compare vector to 64-bit wide elements.
+  void cmpls(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmpls(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             unsigned imm7);
+
+  // Compare vector to 64-bit wide elements.
+  void cmplt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmplt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Compare vector to 64-bit wide elements.
+  void cmpne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Compare vector to immediate.
+  void cmpne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             int imm5);
+
+  // Logically invert boolean condition in vector (predicated).
+  void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Count non-zero bits (predicated).
+  void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Set scalar to multiple of predicate constraint element count.
+  void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Set scalar to multiple of predicate constraint element count.
+  void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Set scalar to multiple of predicate constraint element count.
+  void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Set scalar to active predicate element count.
+  void cntp(const Register& xd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Set scalar to multiple of predicate constraint element count.
+  void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Shuffle active elements of vector to the right and fill with zero.
+  void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
+
+  // Copy signed integer immediate to vector elements (predicated).
+  void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
+
+  // Copy general-purpose register to vector elements (predicated).
+  void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
+
+  // Copy SIMD&FP scalar register to vector elements (predicated).
+  void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
+
+  // Compare and terminate loop.
+  void ctermeq(const Register& rn, const Register& rm);
+
+  // Compare and terminate loop.
+  void ctermne(const Register& rn, const Register& rm);
+
+  // Decrement scalar by multiple of predicate constraint element count.
+  void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement scalar by multiple of predicate constraint element count.
+  void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement vector by multiple of predicate constraint element count.
+  void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement scalar by multiple of predicate constraint element count.
+  void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement vector by multiple of predicate constraint element count.
+  void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement scalar by active predicate element count.
+  void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+  // Decrement vector by active predicate element count.
+  void decp(const ZRegister& zdn, const PRegister& pg);
+
+  // Decrement scalar by multiple of predicate constraint element count.
+  void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Decrement vector by multiple of predicate constraint element count.
+  void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Broadcast general-purpose register to vector elements (unpredicated).
+  void dup(const ZRegister& zd, const Register& xn);
+
+  // Broadcast indexed element to vector (unpredicated).
+  void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
+
+  // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
+  // assembler will pick an appropriate immediate and left shift that is
+  // equivalent to the immediate argument. If an explicit left shift is
+  // specified (0 or 8), the immediate must be a signed 8-bit integer.
+
+  // Broadcast signed immediate to vector elements (unpredicated).
+  void dup(const ZRegister& zd, int imm8, int shift = -1);
+
+  // Broadcast logical bitmask immediate to vector (unpredicated).
+  void dupm(const ZRegister& zd, uint64_t imm);
+
+  // Bitwise exclusive OR with inverted immediate (unpredicated).
+  void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise exclusive OR predicates.
+  void eor(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Bitwise exclusive OR vectors (predicated).
+  void eor(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Bitwise exclusive OR with immediate (unpredicated).
+  void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise exclusive OR vectors (unpredicated).
+  void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Bitwise exclusive OR predicates.
+  void eors(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise XOR reduction to scalar.
+  void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Extract vector from pair of vectors.
+  void ext(const ZRegister& zd,
+           const ZRegister& zn,
+           const ZRegister& zm,
+           unsigned offset);
+
+  // Floating-point absolute difference (predicated).
+  void fabd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point absolute value (predicated).
+  void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point absolute compare vectors.
+  void facge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point absolute compare vectors.
+  void facgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point add immediate (predicated).
+  void fadd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm);
+
+  // Floating-point add vector (predicated).
+  void fadd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point add vector (unpredicated).
+  void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point add strictly-ordered reduction, accumulating in scalar.
+  void fadda(const VRegister& vd,
+             const PRegister& pg,
+             const VRegister& vn,
+             const ZRegister& zm);
+
+  // Floating-point add recursive reduction to scalar.
+  void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Floating-point complex add with rotate (predicated).
+  void fcadd(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int rot);
+
+  // Floating-point compare vector with zero.
+  void fcmeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vectors.
+  void fcmeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point compare vector with zero.
+  void fcmge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vectors.
+  void fcmge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point compare vector with zero.
+  void fcmgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vectors.
+  void fcmgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point complex multiply-add with rotate (predicated).
+  void fcmla(const ZRegister& zda,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int rot);
+
+  // Floating-point complex multiply-add by indexed values with rotate.
+  void fcmla(const ZRegister& zda,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int index,
+             int rot);
+
+  // Floating-point compare vector with zero.
+  void fcmle(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vector with zero.
+  void fcmlt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vector with zero.
+  void fcmne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero);
+
+  // Floating-point compare vectors.
+  void fcmne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point compare vectors.
+  void fcmuo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Copy floating-point immediate to vector elements (predicated).
+  void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
+
+  // Copy half-precision floating-point immediate to vector elements
+  // (predicated).
+  void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
+    fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
+  }
+
+  // Floating-point convert precision (predicated).
+  void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point convert to signed integer, rounding toward zero
+  // (predicated).
+  void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point convert to unsigned integer, rounding toward zero
+  // (predicated).
+  void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point divide by vector (predicated).
+  void fdiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point reversed divide by vector (predicated).
+  void fdivr(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Broadcast floating-point immediate to vector elements.
+  void fdup(const ZRegister& zd, double imm);
+
+  // Broadcast half-precision floating-point immediate to vector elements.
+  void fdup(const ZRegister& zd, Float16 imm) {
+    fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
+  }
+
+  // Floating-point exponential accelerator.
+  void fexpa(const ZRegister& zd, const ZRegister& zn);
+
+  // Floating-point fused multiply-add vectors (predicated), writing
+  // multiplicand [Zdn = Za + Zdn * Zm].
+  void fmad(const ZRegister& zdn,
+            const PRegisterM& pg,
+            const ZRegister& zm,
+            const ZRegister& za);
+
+  // Floating-point maximum with immediate (predicated).
+  void fmax(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm);
+
+  // Floating-point maximum (predicated).
+  void fmax(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point maximum number with immediate (predicated).
+  void fmaxnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              double imm);
+
+  // Floating-point maximum number (predicated).
+  void fmaxnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Floating-point maximum number recursive reduction to scalar.
+  void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Floating-point maximum recursive reduction to scalar.
+  void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Floating-point minimum with immediate (predicated).
+  void fmin(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm);
+
+  // Floating-point minimum (predicated).
+  void fmin(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point minimum number with immediate (predicated).
+  void fminnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              double imm);
+
+  // Floating-point minimum number (predicated).
+  void fminnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Floating-point minimum number recursive reduction to scalar.
+  void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Floating-point minimum recursive reduction to scalar.
+  void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Floating-point fused multiply-add vectors (predicated), writing addend
+  // [Zda = Zda + Zn * Zm].
+  void fmla(const ZRegister& zda,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point fused multiply-add by indexed elements
+  // (Zda = Zda + Zn * Zm[indexed]).
+  void fmla(const ZRegister& zda,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+
+  // Floating-point fused multiply-subtract vectors (predicated), writing
+  // addend [Zda = Zda + -Zn * Zm].
+  void fmls(const ZRegister& zda,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point fused multiply-subtract by indexed elements
+  // (Zda = Zda + -Zn * Zm[indexed]).
+  void fmls(const ZRegister& zda,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+
+  // Move 8-bit floating-point immediate to vector elements (unpredicated).
+  void fmov(const ZRegister& zd, double imm);
+
+  // Move 8-bit floating-point immediate to vector elements (predicated).
+  void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
+
+  // Floating-point fused multiply-subtract vectors (predicated), writing
+  // multiplicand [Zdn = Za + -Zdn * Zm].
+  void fmsb(const ZRegister& zdn,
+            const PRegisterM& pg,
+            const ZRegister& zm,
+            const ZRegister& za);
+
+  // Floating-point multiply by immediate (predicated).
+  void fmul(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm);
+
+  // Floating-point multiply vectors (predicated).
+  void fmul(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point multiply by indexed elements.
+  void fmul(const ZRegister& zd,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            unsigned index);
+
+  // Floating-point multiply vectors (unpredicated).
+  void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point multiply-extended vectors (predicated).
+  void fmulx(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point negate (predicated).
+  void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point negated fused multiply-add vectors (predicated), writing
+  // multiplicand [Zdn = -Za + -Zdn * Zm].
+  void fnmad(const ZRegister& zdn,
+             const PRegisterM& pg,
+             const ZRegister& zm,
+             const ZRegister& za);
+
+  // Floating-point negated fused multiply-add vectors (predicated), writing
+  // addend [Zda = -Zda + -Zn * Zm].
+  void fnmla(const ZRegister& zda,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point negated fused multiply-subtract vectors (predicated),
+  // writing addend [Zda = -Zda + Zn * Zm].
+  void fnmls(const ZRegister& zda,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point negated fused multiply-subtract vectors (predicated),
+  // writing multiplicand [Zdn = -Za + Zdn * Zm].
+  void fnmsb(const ZRegister& zdn,
+             const PRegisterM& pg,
+             const ZRegister& zm,
+             const ZRegister& za);
+
+  // Floating-point reciprocal estimate (unpredicated).
+  void frecpe(const ZRegister& zd, const ZRegister& zn);
+
+  // Floating-point reciprocal step (unpredicated).
+  void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point reciprocal exponent (predicated).
+  void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point round to integral value (predicated).
+  void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point reciprocal square root estimate (unpredicated).
+  void frsqrte(const ZRegister& zd, const ZRegister& zn);
+
+  // Floating-point reciprocal square root step (unpredicated).
+  void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point adjust exponent by vector (predicated).
+  void fscale(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Floating-point square root (predicated).
+  void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Floating-point subtract immediate (predicated).
+  void fsub(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm);
+
+  // Floating-point subtract vectors (predicated).
+  void fsub(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Floating-point subtract vectors (unpredicated).
+  void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point reversed subtract from immediate (predicated).
+  void fsubr(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             double imm);
+
+  // Floating-point reversed subtract vectors (predicated).
+  void fsubr(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Floating-point trigonometric multiply-add coefficient.
+  void ftmad(const ZRegister& zd,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int imm3);
+
+  // Floating-point trigonometric starting value.
+  void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Floating-point trigonometric select coefficient.
+  void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Increment scalar by multiple of predicate constraint element count.
+  void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment scalar by multiple of predicate constraint element count.
+  void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment vector by multiple of predicate constraint element count.
+  void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment scalar by multiple of predicate constraint element count.
+  void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment vector by multiple of predicate constraint element count.
+  void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment scalar by active predicate element count.
+  void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+  // Increment vector by active predicate element count.
+  void incp(const ZRegister& zdn, const PRegister& pg);
+
+  // Increment scalar by multiple of predicate constraint element count.
+  void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Increment vector by multiple of predicate constraint element count.
+  void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Create index starting from and incremented by immediate.
+  void index(const ZRegister& zd, int start, int step);
+
+  // Create index starting from and incremented by general-purpose register.
+  void index(const ZRegister& zd, const Register& rn, const Register& rm);
+
+  // Create index starting from general-purpose register and incremented by
+  // immediate.
+  void index(const ZRegister& zd, const Register& rn, int imm5);
+
+  // Create index starting from immediate and incremented by general-purpose
+  // register.
+  void index(const ZRegister& zd, int imm5, const Register& rm);
+
+  // Insert general-purpose register in shifted vector.
+  void insr(const ZRegister& zdn, const Register& rm);
+
+  // Insert SIMD&FP scalar register in shifted vector.
+  void insr(const ZRegister& zdn, const VRegister& vm);
+
+  // Extract element after last to general-purpose register.
+  void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
+
+  // Extract element after last to SIMD&FP scalar register.
+  void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Extract last element to general-purpose register.
+  void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
+
+  // Extract last element to SIMD&FP scalar register.
+  void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Contiguous/gather load bytes to vector.
+  void ld1b(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/gather load halfwords to vector.
+  void ld1h(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/gather load words to vector.
+  void ld1w(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/gather load doublewords to vector.
+  void ld1d(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // TODO: Merge other loads into the SVEMemOperand versions.
+
+  // Load and broadcast unsigned byte to vector.
+  void ld1rb(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Load and broadcast unsigned halfword to vector.
+  void ld1rh(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Load and broadcast unsigned word to vector.
+  void ld1rw(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Load and broadcast doubleword to vector.
+  void ld1rd(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Contiguous load and replicate sixteen bytes.
+  void ld1rqb(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load and replicate eight halfwords.
+  void ld1rqh(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load and replicate four words.
+  void ld1rqw(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load and replicate two doublewords.
+  void ld1rqd(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Load and broadcast signed byte to vector.
+  void ld1rsb(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Load and broadcast signed halfword to vector.
+  void ld1rsh(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Load and broadcast signed word to vector.
+  void ld1rsw(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous/gather load signed bytes to vector.
+  void ld1sb(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Contiguous/gather load signed halfwords to vector.
+  void ld1sh(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // Contiguous/gather load signed words to vector.
+  void ld1sw(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+
+  // TODO: Merge other loads into the SVEMemOperand versions.
+
+  // Contiguous load two-byte structures to two vectors.
+  void ld2b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load two-halfword structures to two vectors.
+  void ld2h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load two-word structures to two vectors.
+  void ld2w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load two-doubleword structures to two vectors.
+  void ld2d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load three-byte structures to three vectors.
+  void ld3b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load three-halfword structures to three vectors.
+  void ld3h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load three-word structures to three vectors.
+  void ld3w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load three-doubleword structures to three vectors.
+  void ld3d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load four-byte structures to four vectors.
+  void ld4b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load four-halfword structures to four vectors.
+  void ld4h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load four-word structures to four vectors.
+  void ld4w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load four-doubleword structures to four vectors.
+  void ld4d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous load first-fault unsigned bytes to vector.
+  void ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load first-fault unsigned halfwords to vector.
+  void ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load first-fault unsigned words to vector.
+  void ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load first-fault doublewords to vector.
+  void ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load first-fault signed bytes to vector.
+  void ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Contiguous load first-fault signed halfwords to vector.
+  void ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Contiguous load first-fault signed words to vector.
+  void ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Gather load first-fault unsigned bytes to vector.
+  void ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm);
+
+  // Gather load first-fault unsigned bytes to vector (immediate index).
+  void ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5);
+
+  // Gather load first-fault doublewords to vector (vector index).
+  void ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm);
+
+  // Gather load first-fault doublewords to vector (immediate index).
+  void ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5);
+
+  // Gather load first-fault unsigned halfwords to vector (vector index).
+  void ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm);
+
+  // Gather load first-fault unsigned halfwords to vector (immediate index).
+  void ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5);
+
+  // Gather load first-fault signed bytes to vector (vector index).
+  void ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm);
+
+  // Gather load first-fault signed bytes to vector (immediate index).
+  void ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5);
+
+  // Gather load first-fault signed halfwords to vector (vector index).
+  void ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm);
+
+  // Gather load first-fault signed halfwords to vector (immediate index).
+  void ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5);
+
+  // Gather load first-fault signed words to vector (vector index).
+  void ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm);
+
+  // Gather load first-fault signed words to vector (immediate index).
+  void ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5);
+
+  // Gather load first-fault unsigned words to vector (vector index).
+  void ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm);
+
+  // Gather load first-fault unsigned words to vector (immediate index).
+  void ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5);
+
+  // Contiguous load non-fault unsigned bytes to vector (immediate index).
+  void ldnf1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-fault doublewords to vector (immediate index).
+  void ldnf1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-fault unsigned halfwords to vector (immediate
+  // index).
+  void ldnf1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-fault signed bytes to vector (immediate index).
+  void ldnf1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Contiguous load non-fault signed halfwords to vector (immediate index).
+  void ldnf1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Contiguous load non-fault signed words to vector (immediate index).
+  void ldnf1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+
+  // Contiguous load non-fault unsigned words to vector (immediate index).
+  void ldnf1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-temporal bytes to vector.
+  void ldnt1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-temporal halfwords to vector.
+  void ldnt1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-temporal words to vector.
+  void ldnt1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous load non-temporal doublewords to vector.
+  void ldnt1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+
+  // Load SVE predicate/vector register.
+  void ldr(const CPURegister& rt, const SVEMemOperand& addr);
+
+  // Logical shift left by immediate (predicated).
+  void lsl(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift);
+
+  // Logical shift left by 64-bit wide elements (predicated).
+  void lsl(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Logical shift left by immediate (unpredicated).
+  void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
+
+  // Logical shift left by 64-bit wide elements (unpredicated).
+  void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Reversed logical shift left by vector (predicated).
+  void lslr(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Logical shift right by immediate (predicated).
+  void lsr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift);
+
+  // Logical shift right by 64-bit wide elements (predicated).
+  void lsr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Logical shift right by immediate (unpredicated).
+  void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
+
+  // Logical shift right by 64-bit wide elements (unpredicated).
+  void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Reversed logical shift right by vector (predicated).
+  void lsrr(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Bitwise invert predicate.
+  void not_(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Bitwise invert predicate, setting the condition flags.
+  void nots(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Multiply-add vectors (predicated), writing multiplicand
+  // [Zdn = Za + Zdn * Zm].
+  void mad(const ZRegister& zdn,
+           const PRegisterM& pg,
+           const ZRegister& zm,
+           const ZRegister& za);
+
+  // Multiply-add vectors (predicated), writing addend
+  // [Zda = Zda + Zn * Zm].
+  void mla(const ZRegister& zda,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Multiply-subtract vectors (predicated), writing addend
+  // [Zda = Zda - Zn * Zm].
+  void mls(const ZRegister& zda,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Move predicates (unpredicated)
+  void mov(const PRegister& pd, const PRegister& pn);
+
+  // Move predicates (merging)
+  void mov(const PRegisterWithLaneSize& pd,
+           const PRegisterM& pg,
+           const PRegisterWithLaneSize& pn);
+
+  // Move predicates (zeroing)
+  void mov(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn);
+
+  // Move general-purpose register to vector elements (unpredicated)
+  void mov(const ZRegister& zd, const Register& xn);
+
+  // Move SIMD&FP scalar register to vector elements (unpredicated)
+  void mov(const ZRegister& zd, const VRegister& vn);
+
+  // Move vector register (unpredicated)
+  void mov(const ZRegister& zd, const ZRegister& zn);
+
+  // Move indexed element to vector elements (unpredicated)
+  void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
+
+  // Move general-purpose register to vector elements (predicated)
+  void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
+
+  // Move SIMD&FP scalar register to vector elements (predicated)
+  void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
+
+  // Move vector elements (predicated)
+  void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Move signed integer immediate to vector elements (predicated)
+  void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
+
+  // Move signed immediate to vector elements (unpredicated).
+  void mov(const ZRegister& zd, int imm8, int shift);
+
+  // Move logical bitmask immediate to vector (unpredicated).
+  void mov(const ZRegister& zd, uint64_t imm);
+
+  // Move predicate (unpredicated), setting the condition flags
+  void movs(const PRegister& pd, const PRegister& pn);
+
+  // Move predicates (zeroing), setting the condition flags
+  void movs(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn);
+
+  // Move prefix (predicated).
+  void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
+
+  // Move prefix (unpredicated).
+  void movprfx(const ZRegister& zd, const ZRegister& zn);
+
+  // Multiply-subtract vectors (predicated), writing multiplicand
+  // [Zdn = Za - Zdn * Zm].
+  void msb(const ZRegister& zdn,
+           const PRegisterM& pg,
+           const ZRegister& zm,
+           const ZRegister& za);
+
+  // Multiply vectors (predicated).
+  void mul(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Multiply by immediate (unpredicated).
+  void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+  // Bitwise NAND predicates.
+  void nand(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise NAND predicates.
+  void nands(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm);
+
+  // Negate (predicated).
+  void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Bitwise NOR predicates.
+  void nor(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Bitwise NOR predicates.
+  void nors(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise invert vector (predicated).
+  void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Bitwise OR inverted predicate.
+  void orn(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Bitwise OR inverted predicate.
+  void orns(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise OR with inverted immediate (unpredicated).
+  void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise OR predicate.
+  void orr(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Bitwise OR vectors (predicated).
+  void orr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Bitwise OR with immediate (unpredicated).
+  void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
+
+  // Bitwise OR vectors (unpredicated).
+  void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Bitwise OR predicate.
+  void orrs(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Bitwise OR reduction to scalar.
+  void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Set all predicate elements to false.
+  void pfalse(const PRegisterWithLaneSize& pd);
+
+  // Set the first active predicate element to true.
+  void pfirst(const PRegisterWithLaneSize& pd,
+              const PRegister& pg,
+              const PRegisterWithLaneSize& pn);
+
+  // Find next active predicate.
+  void pnext(const PRegisterWithLaneSize& pd,
+             const PRegister& pg,
+             const PRegisterWithLaneSize& pn);
+
+  // Prefetch bytes.
+  void prfb(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Prefetch halfwords.
+  void prfh(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Prefetch words.
+  void prfw(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Prefetch doublewords.
+  void prfd(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Set condition flags for predicate.
+  void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
+
+  // Initialise predicate from named constraint.
+  void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
+
+  // Initialise predicate from named constraint.
+  void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
+
+  // Unpack and widen half of predicate.
+  void punpkhi(const PRegisterWithLaneSize& pd,
+               const PRegisterWithLaneSize& pn);
+
+  // Unpack and widen half of predicate.
+  void punpklo(const PRegisterWithLaneSize& pd,
+               const PRegisterWithLaneSize& pn);
+
+  // Reverse bits (predicated).
+  void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Read the first-fault register.
+  void rdffr(const PRegisterWithLaneSize& pd);
+
+  // Return predicate of succesfully loaded elements.
+  void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
+
+  // Return predicate of succesfully loaded elements.
+  void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
+
+  // Read multiple of vector register size to scalar register.
+  void rdvl(const Register& xd, int imm6);
+
+  // Reverse all elements in a predicate.
+  void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
+
+  // Reverse all elements in a vector (unpredicated).
+  void rev(const ZRegister& zd, const ZRegister& zn);
+
+  // Reverse bytes / halfwords / words within elements (predicated).
+  void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Reverse bytes / halfwords / words within elements (predicated).
+  void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Reverse bytes / halfwords / words within elements (predicated).
+  void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Signed absolute difference (predicated).
+  void sabd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Signed add reduction to scalar.
+  void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
+
+  // Signed integer convert to floating-point (predicated).
+  void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Signed divide (predicated).
+  void sdiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Signed reversed divide (predicated).
+  void sdivr(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Signed dot product by indexed quadtuplet.
+  void sdot(const ZRegister& zda,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+
+  // Signed dot product.
+  void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+  // Conditionally select elements from two predicates.
+  void sel(const PRegisterWithLaneSize& pd,
+           const PRegister& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm);
+
+  // Conditionally select elements from two vectors.
+  void sel(const ZRegister& zd,
+           const PRegister& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Initialise the first-fault register to all true.
+  void setffr();
+
+  // Signed maximum vectors (predicated).
+  void smax(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Signed maximum with immediate (unpredicated).
+  void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+  // Signed maximum reduction to scalar.
+  void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Signed minimum vectors (predicated).
+  void smin(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Signed minimum with immediate (unpredicated).
+  void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+  // Signed minimum reduction to scalar.
+  void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Signed multiply returning high half (predicated).
+  void smulh(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Splice two vectors under predicate control.
+  void splice(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+
+  // Signed saturating add vectors (unpredicated).
+  void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Signed saturating add immediate (unpredicated).
+  void sqadd(const ZRegister& zd,
+             const ZRegister& zn,
+             int imm8,
+             int shift = -1);
+
+  // Signed saturating decrement scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void sqdecb(const Register& xd,
+              const Register& wn,
+              int pattern,
+              int multiplier);
+
+  // Signed saturating decrement scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void sqdecd(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating decrement scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement vector by multiple of 64-bit predicate
+  // constraint element count.
+  void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void sqdech(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating decrement scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement vector by multiple of 16-bit predicate
+  // constraint element count.
+  void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement scalar by active predicate element count.
+  void sqdecp(const Register& xd,
+              const PRegisterWithLaneSize& pg,
+              const Register& wn);
+
+  // Signed saturating decrement scalar by active predicate element count.
+  void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
+
+  // Signed saturating decrement vector by active predicate element count.
+  void sqdecp(const ZRegister& zdn, const PRegister& pg);
+
+  // Signed saturating decrement scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void sqdecw(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating decrement scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating decrement vector by multiple of 32-bit predicate
+  // constraint element count.
+  void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void sqincb(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void sqincd(const Register& xd,
+              const Register& wn,
+              int pattern,
+              int multiplier);
+
+  // Signed saturating increment scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment vector by multiple of 64-bit predicate
+  // constraint element count.
+  void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void sqinch(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment vector by multiple of 16-bit predicate
+  // constraint element count.
+  void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment scalar by active predicate element count.
+  void sqincp(const Register& xd,
+              const PRegisterWithLaneSize& pg,
+              const Register& wn);
+
+  // Signed saturating increment scalar by active predicate element count.
+  void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
+
+  // Signed saturating increment vector by active predicate element count.
+  void sqincp(const ZRegister& zdn, const PRegister& pg);
+
+  // Signed saturating increment scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void sqincw(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1);
+
+  // Signed saturating increment scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating increment vector by multiple of 32-bit predicate
+  // constraint element count.
+  void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Signed saturating subtract vectors (unpredicated).
+  void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Signed saturating subtract immediate (unpredicated).
+  void sqsub(const ZRegister& zd,
+             const ZRegister& zn,
+             int imm8,
+             int shift = -1);
+
+  // Contiguous/scatter store bytes from vector.
+  void st1b(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/scatter store halfwords from vector.
+  void st1h(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/scatter store words from vector.
+  void st1w(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous/scatter store doublewords from vector.
+  void st1d(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store two-byte structures from two vectors.
+  void st2b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store two-halfword structures from two vectors.
+  void st2h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store two-word structures from two vectors.
+  void st2w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store two-doubleword structures from two vectors,
+  void st2d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store three-byte structures from three vectors.
+  void st3b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store three-halfword structures from three vectors.
+  void st3h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store three-word structures from three vectors.
+  void st3w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store three-doubleword structures from three vectors.
+  void st3d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store four-byte structures from four vectors.
+  void st4b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store four-halfword structures from four vectors.
+  void st4h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store four-word structures from four vectors.
+  void st4w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store four-doubleword structures from four vectors.
+  void st4d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+
+  // Contiguous store non-temporal bytes from vector.
+  void stnt1b(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous store non-temporal halfwords from vector.
+  void stnt1h(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous store non-temporal words from vector.
+  void stnt1w(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+
+  // Contiguous store non-temporal doublewords from vector.
+  void stnt1d(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+
+  // Store SVE predicate/vector register.
+  void str(const CPURegister& rt, const SVEMemOperand& addr);
+
+  // Subtract vectors (predicated).
+  void sub(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+
+  // Subtract vectors (unpredicated).
+  void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Subtract immediate (unpredicated).
+  void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+  // Reversed subtract vectors (predicated).
+  void subr(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Reversed subtract from immediate (unpredicated).
+  void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
+
+  // Signed unpack and extend half of vector.
+  void sunpkhi(const ZRegister& zd, const ZRegister& zn);
+
+  // Signed unpack and extend half of vector.
+  void sunpklo(const ZRegister& zd, const ZRegister& zn);
+
+  // Signed byte extend (predicated).
+  void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Signed halfword extend (predicated).
+  void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Signed word extend (predicated).
+  void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Programmable table lookup/permute using vector of indices into a
+  // vector.
+  void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Interleave even or odd elements from two predicates.
+  void trn1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Interleave even or odd elements from two vectors.
+  void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Interleave even or odd elements from two predicates.
+  void trn2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Interleave even or odd elements from two vectors.
+  void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Unsigned absolute difference (predicated).
+  void uabd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Unsigned add reduction to scalar.
+  void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
+
+  // Unsigned integer convert to floating-point (predicated).
+  void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Unsigned divide (predicated).
+  void udiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Unsigned reversed divide (predicated).
+  void udivr(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Unsigned dot product by indexed quadtuplet.
+  void udot(const ZRegister& zda,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+
+  // Unsigned dot product.
+  void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+  // Unsigned maximum vectors (predicated).
+  void umax(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Unsigned maximum with immediate (unpredicated).
+  void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+  // Unsigned maximum reduction to scalar.
+  void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Unsigned minimum vectors (predicated).
+  void umin(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+
+  // Unsigned minimum with immediate (unpredicated).
+  void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
+
+  // Unsigned minimum reduction to scalar.
+  void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
+
+  // Unsigned multiply returning high half (predicated).
+  void umulh(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm);
+
+  // Unsigned saturating add vectors (unpredicated).
+  void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Unsigned saturating add immediate (unpredicated).
+  void uqadd(const ZRegister& zd,
+             const ZRegister& zn,
+             int imm8,
+             int shift = -1);
+
+  // Unsigned saturating decrement scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement vector by multiple of 64-bit predicate
+  // constraint element count.
+  void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement vector by multiple of 16-bit predicate
+  // constraint element count.
+  void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement scalar by active predicate element count.
+  void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+  // Unsigned saturating decrement vector by active predicate element count.
+  void uqdecp(const ZRegister& zdn, const PRegister& pg);
+
+  // Unsigned saturating decrement scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating decrement vector by multiple of 32-bit predicate
+  // constraint element count.
+  void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment scalar by multiple of 8-bit predicate
+  // constraint element count.
+  void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment scalar by multiple of 64-bit predicate
+  // constraint element count.
+  void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment vector by multiple of 64-bit predicate
+  // constraint element count.
+  void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment scalar by multiple of 16-bit predicate
+  // constraint element count.
+  void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment vector by multiple of 16-bit predicate
+  // constraint element count.
+  void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment scalar by active predicate element count.
+  void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
+
+  // Unsigned saturating increment vector by active predicate element count.
+  void uqincp(const ZRegister& zdn, const PRegister& pg);
+
+  // Unsigned saturating increment scalar by multiple of 32-bit predicate
+  // constraint element count.
+  void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating increment vector by multiple of 32-bit predicate
+  // constraint element count.
+  void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
+
+  // Unsigned saturating subtract vectors (unpredicated).
+  void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Unsigned saturating subtract immediate (unpredicated).
+  void uqsub(const ZRegister& zd,
+             const ZRegister& zn,
+             int imm8,
+             int shift = -1);
+
+  // Unsigned unpack and extend half of vector.
+  void uunpkhi(const ZRegister& zd, const ZRegister& zn);
+
+  // Unsigned unpack and extend half of vector.
+  void uunpklo(const ZRegister& zd, const ZRegister& zn);
+
+  // Unsigned byte extend (predicated).
+  void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Unsigned halfword extend (predicated).
+  void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Unsigned word extend (predicated).
+  void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+  // Concatenate even or odd elements from two predicates.
+  void uzp1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Concatenate even or odd elements from two vectors.
+  void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Concatenate even or odd elements from two predicates.
+  void uzp2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Concatenate even or odd elements from two vectors.
+  void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // While incrementing signed scalar less than or equal to scalar.
+  void whilele(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm);
+
+  // While incrementing unsigned scalar lower than scalar.
+  void whilelo(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm);
+
+  // While incrementing unsigned scalar lower or same as scalar.
+  void whilels(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm);
+
+  // While incrementing signed scalar less than scalar.
+  void whilelt(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm);
+
+  // Write the first-fault register.
+  void wrffr(const PRegisterWithLaneSize& pn);
+
+  // Interleave elements from two half predicates.
+  void zip1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Interleave elements from two half vectors.
+  void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+  // Interleave elements from two half predicates.
+  void zip2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm);
+
+  // Interleave elements from two half vectors.
+  void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
   // Emit generic instructions.
 
   // Emit raw instructions into the instruction stream.
@@ -3650,20 +5852,20 @@ class Assembler : public vixl::internal::AssemblerBase {
   // Code generation helpers.
 
   // Register encoding.
-  static Instr Rd(CPURegister rd) {
-    VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
-    return rd.GetCode() << Rd_offset;
+  template <int hibit, int lobit>
+  static Instr Rx(CPURegister rx) {
+    VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
+    return ImmUnsignedField<hibit, lobit>(rx.GetCode());
   }
 
-  static Instr Rn(CPURegister rn) {
-    VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
-    return rn.GetCode() << Rn_offset;
-  }
-
-  static Instr Rm(CPURegister rm) {
-    VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
-    return rm.GetCode() << Rm_offset;
+#define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
+#define REGISTER_ENCODER(N)                                           \
+  static Instr R##N(CPURegister r##N) {                               \
+    return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
   }
+  CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
+#undef REGISTER_ENCODER
+#undef CPU_REGISTER_FIELD_NAMES
 
   static Instr RmNot31(CPURegister rm) {
     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
@@ -3671,26 +5873,6 @@ class Assembler : public vixl::internal::AssemblerBase {
     return Rm(rm);
   }
 
-  static Instr Ra(CPURegister ra) {
-    VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
-    return ra.GetCode() << Ra_offset;
-  }
-
-  static Instr Rt(CPURegister rt) {
-    VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
-    return rt.GetCode() << Rt_offset;
-  }
-
-  static Instr Rt2(CPURegister rt2) {
-    VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
-    return rt2.GetCode() << Rt2_offset;
-  }
-
-  static Instr Rs(CPURegister rs) {
-    VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
-    return rs.GetCode() << Rs_offset;
-  }
-
   // These encoding functions allow the stack pointer to be encoded, and
   // disallow the zero register.
   static Instr RdSP(Register rd) {
@@ -3708,6 +5890,33 @@ class Assembler : public vixl::internal::AssemblerBase {
     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
   }
 
+  static Instr Pd(PRegister pd) {
+    return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
+  }
+
+  static Instr Pm(PRegister pm) {
+    return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
+  }
+
+  static Instr Pn(PRegister pn) {
+    return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
+  }
+
+  static Instr PgLow8(PRegister pg) {
+    // Governing predicates can be merging, zeroing, or unqualified. They should
+    // never have a lane size.
+    VIXL_ASSERT(!pg.HasLaneSize());
+    return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
+  }
+
+  template <int hibit, int lobit>
+  static Instr Pg(PRegister pg) {
+    // Governing predicates can be merging, zeroing, or unqualified. They should
+    // never have a lane size.
+    VIXL_ASSERT(!pg.HasLaneSize());
+    return Rx<hibit, lobit>(pg);
+  }
+
   // Flags encoding.
   static Instr Flags(FlagsUpdate S) {
     if (S == SetFlags) {
@@ -3721,6 +5930,26 @@ class Assembler : public vixl::internal::AssemblerBase {
 
   static Instr Cond(Condition cond) { return cond << Condition_offset; }
 
+  // Generic immediate encoding.
+  template <int hibit, int lobit>
+  static Instr ImmField(int64_t imm) {
+    VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
+    VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
+    int fieldsize = hibit - lobit + 1;
+    VIXL_ASSERT(IsIntN(fieldsize, imm));
+    return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
+  }
+
+  // For unsigned immediate encoding.
+  // TODO: Handle signed and unsigned immediate in satisfactory way.
+  template <int hibit, int lobit>
+  static Instr ImmUnsignedField(uint64_t imm) {
+    VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
+    VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
+    VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
+    return static_cast<Instr>(imm << lobit);
+  }
+
   // PC-relative address encoding.
   static Instr ImmPCRelAddress(int64_t imm21) {
     VIXL_ASSERT(IsInt21(imm21));
@@ -3771,11 +6000,60 @@ class Assembler : public vixl::internal::AssemblerBase {
     if (IsUint12(imm)) {  // No shift required.
       imm <<= ImmAddSub_offset;
     } else {
-      imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
+      imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
     }
     return imm;
   }
 
+  static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
+    VIXL_ASSERT(IsUint6(imms));
+    VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
+    USE(lane_size);
+    return imms << SVEImmSetBits_offset;
+  }
+
+  static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
+    VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
+    USE(lane_size);
+    return immr << SVEImmRotate_offset;
+  }
+
+  static Instr SVEBitN(unsigned bitn) {
+    VIXL_ASSERT(IsUint1(bitn));
+    return bitn << SVEBitN_offset;
+  }
+
+  static Instr SVEDtype(unsigned msize_in_bytes_log2,
+                        unsigned esize_in_bytes_log2,
+                        bool is_signed,
+                        int dtype_h_lsb = 23,
+                        int dtype_l_lsb = 21) {
+    VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
+    VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
+    Instr dtype_h = msize_in_bytes_log2;
+    Instr dtype_l = esize_in_bytes_log2;
+    // Signed forms use the encodings where msize would be greater than esize.
+    if (is_signed) {
+      dtype_h = dtype_h ^ 0x3;
+      dtype_l = dtype_l ^ 0x3;
+    }
+    VIXL_ASSERT(IsUint2(dtype_h));
+    VIXL_ASSERT(IsUint2(dtype_l));
+    VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
+
+    return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
+  }
+
+  static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
+                             unsigned esize_in_bytes_log2,
+                             bool is_signed) {
+    return SVEDtype(msize_in_bytes_log2,
+                    esize_in_bytes_log2,
+                    is_signed,
+                    23,
+                    13);
+  }
+
   static Instr ImmS(unsigned imms, unsigned reg_size) {
     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
                 ((reg_size == kWRegSize) && IsUint5(imms)));
@@ -3856,9 +6134,9 @@ class Assembler : public vixl::internal::AssemblerBase {
     return TruncateToUint9(imm9) << ImmLS_offset;
   }
 
-  static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
-    VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
-    int64_t scaled_imm7 = imm7 / (1 << access_size);
+  static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
+    VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
+    int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
     VIXL_ASSERT(IsInt7(scaled_imm7));
     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
   }
@@ -3990,8 +6268,8 @@ class Assembler : public vixl::internal::AssemblerBase {
                            unsigned* n = NULL,
                            unsigned* imm_s = NULL,
                            unsigned* imm_r = NULL);
-  static bool IsImmLSPair(int64_t offset, unsigned access_size);
-  static bool IsImmLSScaled(int64_t offset, unsigned access_size);
+  static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
+  static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
   static bool IsImmLSUnscaled(int64_t offset);
   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
@@ -4126,6 +6404,30 @@ class Assembler : public vixl::internal::AssemblerBase {
     }
   }
 
+  template <typename T>
+  static Instr SVESize(const T& rd) {
+    VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
+    VIXL_ASSERT(rd.HasLaneSize());
+    switch (rd.GetLaneSizeInBytes()) {
+      case 1:
+        return SVE_B;
+      case 2:
+        return SVE_H;
+      case 4:
+        return SVE_S;
+      case 8:
+        return SVE_D;
+      default:
+        return 0xffffffff;
+    }
+  }
+
+  static Instr ImmSVEPredicateConstraint(int pattern) {
+    VIXL_ASSERT(IsUint5(pattern));
+    return (pattern << ImmSVEPredicateConstraint_offset) &
+           ImmSVEPredicateConstraint_mask;
+  }
+
   static Instr ImmNEONHLM(int index, int num_bits) {
     int h, l, m;
     if (num_bits == 3) {
@@ -4277,9 +6579,93 @@ class Assembler : public vixl::internal::AssemblerBase {
                              const MemOperand& addr,
                              Instr op);
 
+  // Set `is_load` to false in default as it's only used in the
+  // scalar-plus-vector form.
+  Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
+                            int num_regs,
+                            const SVEMemOperand& addr,
+                            bool is_load = false);
+
+  // E.g. st1b, st1h, ...
+  // This supports both contiguous and scatter stores.
+  void SVESt1Helper(unsigned msize_in_bytes_log2,
+                    const ZRegister& zt,
+                    const PRegister& pg,
+                    const SVEMemOperand& addr);
+
+  // E.g. ld1b, ld1h, ...
+  // This supports both contiguous and gather loads.
+  void SVELd1Helper(unsigned msize_in_bytes_log2,
+                    const ZRegister& zt,
+                    const PRegisterZ& pg,
+                    const SVEMemOperand& addr,
+                    bool is_signed);
+
+  // E.g. ld1rb, ld1rh, ...
+  void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
+                             const ZRegister& zt,
+                             const PRegisterZ& pg,
+                             const SVEMemOperand& addr,
+                             bool is_signed);
+
+  // E.g. ldff1b, ldff1h, ...
+  // This supports both contiguous and gather loads.
+  void SVELdff1Helper(unsigned msize_in_bytes_log2,
+                      const ZRegister& zt,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr,
+                      bool is_signed);
+
+  // Common code for the helpers above.
+  void SVELdSt1Helper(unsigned msize_in_bytes_log2,
+                      const ZRegister& zt,
+                      const PRegister& pg,
+                      const SVEMemOperand& addr,
+                      bool is_signed,
+                      Instr op);
+
+  // Common code for the helpers above.
+  void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
+                              const ZRegister& zt,
+                              const PRegister& pg,
+                              const SVEMemOperand& addr,
+                              bool is_load,
+                              bool is_signed,
+                              bool is_first_fault);
+
+  // E.g. st2b, st3h, ...
+  void SVESt234Helper(int num_regs,
+                      const ZRegister& zt1,
+                      const PRegister& pg,
+                      const SVEMemOperand& addr);
+
+  // E.g. ld2b, ld3h, ...
+  void SVELd234Helper(int num_regs,
+                      const ZRegister& zt1,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr);
+
+  // Common code for the helpers above.
+  void SVELdSt234Helper(int num_regs,
+                        const ZRegister& zt1,
+                        const PRegister& pg,
+                        const SVEMemOperand& addr,
+                        Instr op);
+
+  // E.g. ld1qb, ld1qh, ldnt1b, ...
+  void SVELd1St1ScaImmHelper(const ZRegister& zt,
+                             const PRegister& pg,
+                             const SVEMemOperand& addr,
+                             Instr regoffset_op,
+                             Instr immoffset_op,
+                             int imm_divisor = 1);
+
   void Prefetch(PrefetchOperation op,
                 const MemOperand& addr,
                 LoadStoreScalingOption option = PreferScaledOffset);
+  void Prefetch(int op,
+                const MemOperand& addr,
+                LoadStoreScalingOption option = PreferScaledOffset);
 
   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
   // reports a bogus uninitialised warning then.
@@ -4287,6 +6673,9 @@ class Assembler : public vixl::internal::AssemblerBase {
                const Register& rn,
                const Operand operand,
                LogicalOp op);
+
+  void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
+
   void LogicalImmediate(const Register& rd,
                         const Register& rn,
                         unsigned n,
@@ -4306,6 +6695,92 @@ class Assembler : public vixl::internal::AssemblerBase {
                        FlagsUpdate S,
                        AddSubWithCarryOp op);
 
+  void CompareVectors(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      SVEIntCompareVectorsOp op);
+
+  void CompareVectors(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm,
+                      SVEIntCompareSignedImmOp op);
+
+  void CompareVectors(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      unsigned imm,
+                      SVEIntCompareUnsignedImmOp op);
+
+  void SVEIntAddSubtractImmUnpredicatedHelper(
+      SVEIntAddSubtractImm_UnpredicatedOp op,
+      const ZRegister& zd,
+      int imm8,
+      int shift);
+
+  void SVEElementCountToRegisterHelper(Instr op,
+                                       const Register& rd,
+                                       int pattern,
+                                       int multiplier);
+
+  Instr EncodeSVEShiftImmediate(Shift shift_op,
+                                int shift,
+                                int lane_size_in_bits);
+
+  void SVEBitwiseShiftImmediate(const ZRegister& zd,
+                                const ZRegister& zn,
+                                Instr encoded_imm,
+                                SVEBitwiseShiftUnpredicatedOp op);
+
+  void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
+                                    const PRegisterM& pg,
+                                    Instr encoded_imm,
+                                    SVEBitwiseShiftByImm_PredicatedOp op);
+
+  Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
+                            const ZRegister& zm,
+                            int index,
+                            Instr op_h,
+                            Instr op_s,
+                            Instr op_d);
+
+
+  void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
+                                                   const PRegister& pg,
+                                                   const SVEMemOperand& addr,
+                                                   int prefetch_size);
+
+  void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
+                                                   const PRegister& pg,
+                                                   const SVEMemOperand& addr,
+                                                   int prefetch_size);
+
+  void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
+                                                  const PRegister& pg,
+                                                  const SVEMemOperand& addr,
+                                                  int prefetch_size);
+
+  void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
+                                                  const PRegister& pg,
+                                                  const SVEMemOperand& addr,
+                                                  int prefetch_size);
+
+  void SVEPrefetchHelper(PrefetchOperation prfop,
+                         const PRegister& pg,
+                         const SVEMemOperand& addr,
+                         int prefetch_size);
+
+  static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
+    // SVE only supports PLD and PST, not PLI.
+    VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
+                ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
+    // Check that we can simply map bits.
+    VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
+    VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
+    // Remaining operations map directly.
+    return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
+  }
 
   // Functions for emulating operands not directly supported by the instruction
   // set.
@@ -4507,12 +6982,16 @@ class Assembler : public vixl::internal::AssemblerBase {
                            NEONShiftImmediateOp op);
   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
 
+  // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
+  // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
+  void ResolveSVEImm8Shift(int* imm8, int* shift);
+
   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
 
   // Encode the specified MemOperand for the specified access size and scaling
   // preference.
   Instr LoadStoreMemOperand(const MemOperand& addr,
-                            unsigned access_size,
+                            unsigned access_size_in_bytes_log2,
                             LoadStoreScalingOption option);
 
   // Link the current (not-yet-emitted) instruction to the specified label, then
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
new file mode 100644
index 00000000..f7cf8b21
--- /dev/null
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -0,0 +1,6489 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "assembler-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+void Assembler::ResolveSVEImm8Shift(int* imm8, int* shift) {
+  if (*shift < 0) {
+    VIXL_ASSERT(*shift == -1);
+    // Derive the shift amount from the immediate.
+    if (IsInt8(*imm8)) {
+      *shift = 0;
+    } else if ((*imm8 % 256) == 0) {
+      *imm8 /= 256;
+      *shift = 8;
+    }
+  }
+
+  VIXL_ASSERT(IsInt8(*imm8));
+  VIXL_ASSERT((*shift == 0) || (*shift == 8));
+}
+
+// SVEAddressGeneration.
+
+void Assembler::adr(const ZRegister& zd, const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsVectorPlusVector());
+  VIXL_ASSERT(
+      AreSameLaneSize(zd, addr.GetVectorBase(), addr.GetVectorOffset()));
+
+  int lane_size = zd.GetLaneSizeInBits();
+  VIXL_ASSERT((lane_size == kSRegSize) || (lane_size == kDRegSize));
+
+  int shift_amount = addr.GetShiftAmount();
+  VIXL_ASSERT((shift_amount >= 0) && (shift_amount <= 3));
+
+  Instr op = 0xffffffff;
+  Instr msz = shift_amount << 10;
+  SVEOffsetModifier mod = addr.GetOffsetModifier();
+  switch (mod) {
+    case SVE_UXTW:
+      VIXL_ASSERT(lane_size == kDRegSize);
+      op = ADR_z_az_d_u32_scaled;
+      break;
+    case SVE_SXTW:
+      VIXL_ASSERT(lane_size == kDRegSize);
+      op = ADR_z_az_d_s32_scaled;
+      break;
+    case SVE_LSL:
+    case NO_SVE_OFFSET_MODIFIER:
+      op = (lane_size == kSRegSize) ? ADR_z_az_s_same_scaled
+                                    : ADR_z_az_d_same_scaled;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+  Emit(op | msz | Rd(zd) | Rn(addr.GetVectorBase()) |
+       Rm(addr.GetVectorOffset()));
+}
+
+void Assembler::SVELogicalImmediate(const ZRegister& zdn,
+                                    uint64_t imm,
+                                    Instr op) {
+  unsigned bit_n, imm_s, imm_r;
+  unsigned lane_size = zdn.GetLaneSizeInBits();
+  // Check that the immediate can be encoded in the instruction.
+  if (IsImmLogical(imm, lane_size, &bit_n, &imm_s, &imm_r)) {
+    Emit(op | Rd(zdn) | SVEBitN(bit_n) | SVEImmRotate(imm_r, lane_size) |
+         SVEImmSetBits(imm_s, lane_size));
+  } else {
+    VIXL_UNREACHABLE();
+  }
+}
+
+void Assembler::and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  SVELogicalImmediate(zd, imm, AND_z_zi);
+}
+
+void Assembler::dupm(const ZRegister& zd, uint64_t imm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  // DUPM_z_i is an SVEBroadcastBitmaskImmOp, but its encoding and constraints
+  // are similar enough to SVEBitwiseLogicalWithImm_UnpredicatedOp, that we can
+  // use the logical immediate encoder to get the correct behaviour.
+  SVELogicalImmediate(zd, imm, DUPM_z_i);
+}
+
+void Assembler::eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  SVELogicalImmediate(zd, imm, EOR_z_zi);
+}
+
+void Assembler::orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  SVELogicalImmediate(zd, imm, ORR_z_zi);
+}
+
+// SVEBitwiseLogicalUnpredicated.
+void Assembler::and_(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.IsLaneSizeD());
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  Emit(AND_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::bic(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.IsLaneSizeD());
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  Emit(BIC_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::eor(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.IsLaneSizeD());
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  Emit(EOR_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::orr(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.IsLaneSizeD());
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  Emit(ORR_z_zz | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEBitwiseShiftPredicated.
+
+void Assembler::SVEBitwiseShiftImmediatePred(
+    const ZRegister& zdn,
+    const PRegisterM& pg,
+    Instr encoded_imm_and_tsz,
+    SVEBitwiseShiftByImm_PredicatedOp op) {
+  Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
+                       << 5;
+  Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
+  Emit(op | tszh | tszl_and_imm | PgLow8(pg) | Rd(zdn));
+}
+
+void Assembler::asr(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    int shift) {
+  // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+  //  0000 0100 ..00 0000 100. .... .... ....
+  //  tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 0 | Pg<12:10> |
+  //  tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi);
+}
+
+void Assembler::asr(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // ASR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+  //  0000 0100 ..01 1000 100. .... .... ....
+  //  size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+              ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+               (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+  Instr op = ASR_z_p_zw;
+  if (AreSameLaneSize(zd, zn, zm)) {
+    op = ASR_z_p_zz;
+  }
+  Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::asrd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     int shift) {
+  // ASRD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+  //  0000 0100 ..00 0100 100. .... .... ....
+  //  tszh<23:22> | opc<19:18> = 01 | L<17> = 0 | U<16> = 0 | Pg<12:10> |
+  //  tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi);
+}
+
+void Assembler::asrr(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // ASRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0100 100. .... .... ....
+  //  size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(ASRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsl(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    int shift) {
+  // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+  //  0000 0100 ..00 0011 100. .... .... ....
+  //  tszh<23:22> | opc<19:18> = 00 | L<17> = 1 | U<16> = 1 | Pg<12:10> |
+  //  tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi);
+}
+
+void Assembler::lsl(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // LSL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+  //  0000 0100 ..01 1011 100. .... .... ....
+  //  size<23:22> | R<18> = 0 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+              ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+               (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+  Instr op = LSL_z_p_zw;
+  if (AreSameLaneSize(zd, zn, zm)) {
+    op = LSL_z_p_zz;
+  }
+  Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lslr(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // LSLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0111 100. .... .... ....
+  //  size<23:22> | R<18> = 1 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(LSLR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsr(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    int shift) {
+  // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+  //  0000 0100 ..00 0001 100. .... .... ....
+  //  tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 1 | Pg<12:10> |
+  //  tszl<9:8> | imm3<7:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi);
+}
+
+void Assembler::lsr(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // LSR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.D
+  //  0000 0100 ..01 1001 100. .... .... ....
+  //  size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) ||
+              ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) &&
+               (zd.GetLaneSizeInBytes() != kDRegSizeInBytes)));
+  Instr op = LSR_z_p_zw;
+  if (AreSameLaneSize(zd, zn, zm)) {
+    op = LSR_z_p_zz;
+  }
+  Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::lsrr(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // LSRR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0101 100. .... .... ....
+  //  size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> |
+  //  Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(LSRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEBitwiseShiftUnpredicated.
+
+Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op,
+                                         int shift,
+                                         int lane_size_in_bits) {
+  if (shift_op == LSL) {
+    VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+    return lane_size_in_bits + shift;
+  }
+
+  VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR));
+  VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits));
+  return (2 * lane_size_in_bits) - shift;
+}
+
+void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd,
+                                         const ZRegister& zn,
+                                         Instr encoded_imm_and_tsz,
+                                         SVEBitwiseShiftUnpredicatedOp op) {
+  Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
+                       << 16;
+  Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
+  Emit(op | tszh | tszl_and_imm | Rd(zd) | Rn(zn));
+}
+
+void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi);
+}
+
+void Assembler::asr(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+  Emit(ASR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi);
+}
+
+void Assembler::lsl(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+  Emit(LSL_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr encoded_imm =
+      EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+  SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi);
+}
+
+void Assembler::lsr(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+  Emit(LSR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEElementCount.
+
+#define VIXL_SVE_INC_DEC_LIST(V) \
+  V(cntb, CNTB_r_s)              \
+  V(cnth, CNTH_r_s)              \
+  V(cntw, CNTW_r_s)              \
+  V(cntd, CNTD_r_s)              \
+  V(decb, DECB_r_rs)             \
+  V(dech, DECH_r_rs)             \
+  V(decw, DECW_r_rs)             \
+  V(decd, DECD_r_rs)             \
+  V(incb, INCB_r_rs)             \
+  V(inch, INCH_r_rs)             \
+  V(incw, INCW_r_rs)             \
+  V(incd, INCD_r_rs)             \
+  V(sqdecb, SQDECB_r_rs_x)       \
+  V(sqdech, SQDECH_r_rs_x)       \
+  V(sqdecw, SQDECW_r_rs_x)       \
+  V(sqdecd, SQDECD_r_rs_x)       \
+  V(sqincb, SQINCB_r_rs_x)       \
+  V(sqinch, SQINCH_r_rs_x)       \
+  V(sqincw, SQINCW_r_rs_x)       \
+  V(sqincd, SQINCD_r_rs_x)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                     \
+  void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                              \
+    VIXL_ASSERT(rdn.IsX());                                              \
+    Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) |             \
+         ImmUnsignedField<19, 16>(multiplier - 1));                      \
+  }
+VIXL_SVE_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_UQINC_UQDEC_LIST(V)                      \
+  V(uqdecb, (rdn.IsX() ? UQDECB_r_rs_x : UQDECB_r_rs_uw)) \
+  V(uqdech, (rdn.IsX() ? UQDECH_r_rs_x : UQDECH_r_rs_uw)) \
+  V(uqdecw, (rdn.IsX() ? UQDECW_r_rs_x : UQDECW_r_rs_uw)) \
+  V(uqdecd, (rdn.IsX() ? UQDECD_r_rs_x : UQDECD_r_rs_uw)) \
+  V(uqincb, (rdn.IsX() ? UQINCB_r_rs_x : UQINCB_r_rs_uw)) \
+  V(uqinch, (rdn.IsX() ? UQINCH_r_rs_x : UQINCH_r_rs_uw)) \
+  V(uqincw, (rdn.IsX() ? UQINCW_r_rs_x : UQINCW_r_rs_uw)) \
+  V(uqincd, (rdn.IsX() ? UQINCD_r_rs_x : UQINCD_r_rs_uw))
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                     \
+  void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                              \
+    Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) |             \
+         ImmUnsignedField<19, 16>(multiplier - 1));                      \
+  }
+VIXL_SVE_UQINC_UQDEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_SQX_INC_DEC_LIST(V) \
+  V(sqdecb, SQDECB)                  \
+  V(sqdech, SQDECH)                  \
+  V(sqdecw, SQDECW)                  \
+  V(sqdecd, SQDECD)                  \
+  V(sqincb, SQINCB)                  \
+  V(sqinch, SQINCH)                  \
+  V(sqincw, SQINCW)                  \
+  V(sqincd, SQINCD)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP)                                  \
+  void Assembler::FN(const Register& xd,                              \
+                     const Register& wn,                              \
+                     int pattern,                                     \
+                     int multiplier) {                                \
+    USE(wn);                                                          \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                           \
+    VIXL_ASSERT(wn.IsW() && xd.Is(wn.X()));                           \
+    Emit(OP##_r_rs_sx | Rd(xd) | ImmSVEPredicateConstraint(pattern) | \
+         ImmUnsignedField<19, 16>(multiplier - 1));                   \
+  }
+VIXL_SVE_SQX_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+#define VIXL_SVE_INC_DEC_VEC_LIST(V) \
+  V(dech, DEC, H)                    \
+  V(decw, DEC, W)                    \
+  V(decd, DEC, D)                    \
+  V(inch, INC, H)                    \
+  V(incw, INC, W)                    \
+  V(incd, INC, D)                    \
+  V(sqdech, SQDEC, H)                \
+  V(sqdecw, SQDEC, W)                \
+  V(sqdecd, SQDEC, D)                \
+  V(sqinch, SQINC, H)                \
+  V(sqincw, SQINC, W)                \
+  V(sqincd, SQINC, D)                \
+  V(uqdech, UQDEC, H)                \
+  V(uqdecw, UQDEC, W)                \
+  V(uqdecd, UQDEC, D)                \
+  V(uqinch, UQINC, H)                \
+  V(uqincw, UQINC, W)                \
+  V(uqincd, UQINC, D)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, OP, T)                                   \
+  void Assembler::FN(const ZRegister& zdn, int pattern, int multiplier) { \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                               \
+    VIXL_ASSERT(zdn.GetLaneSizeInBytes() == k##T##RegSizeInBytes);        \
+    Emit(OP##T##_z_zs | Rd(zdn) | ImmSVEPredicateConstraint(pattern) |    \
+         ImmUnsignedField<19, 16>(multiplier - 1));                       \
+  }
+VIXL_SVE_INC_DEC_VEC_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+// SVEFPAccumulatingReduction.
+
+void Assembler::fadda(const VRegister& vd,
+                      const PRegister& pg,
+                      const VRegister& vn,
+                      const ZRegister& zm) {
+  // FADDA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+  //  0110 0101 ..01 1000 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+  USE(vn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.Is(vn));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zm.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zm, vd));
+
+  Emit(FADDA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEFPArithmetic_Predicated.
+
+void Assembler::fabd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 1000 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 1000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fadd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     double imm) {
+  // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1000 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FADD_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fadd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0000 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fdiv(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 1101 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 1101 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fdivr(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 1100 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 1100 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmax(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     double imm) {
+  // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1110 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 110 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FMAX_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmax(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0110 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0110 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmaxnm(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn,
+                       double imm) {
+  // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1100 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 100 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FMAXNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmaxnm(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FMAXNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0100 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0100 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMAXNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmin(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     double imm) {
+  // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1111 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 111 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FMIN_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmin(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0111 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0111 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fminnm(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn,
+                       double imm) {
+  // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1101 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 101 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FMINNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fminnm(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FMINNM <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0101 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0101 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMINNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmul(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     double imm) {
+  // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1010 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 010 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((imm == 0.5) || (imm == 2.0));
+
+  Instr i1 = (imm == 2.0) ? (1 << 5) : 0;
+  Emit(FMUL_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fmul(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0010 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmulx(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FMULX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 1010 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 1010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMULX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fscale(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FSCALE <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 1001 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 1001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FSCALE_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fsub(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     double imm) {
+  // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1001 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 001 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FSUB_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fsub(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0001 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FSUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fsubr(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      double imm) {
+  // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <const>
+  //  0110 0101 ..01 1011 100. ..00 00.. ....
+  //  size<23:22> | opc<18:16> = 011 | Pg<12:10> | i1<5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((imm == 0.5) || (imm == 1.0));
+
+  Instr i1 = (imm == 1.0) ? (1 << 5) : 0;
+  Emit(FSUBR_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1);
+}
+
+void Assembler::fsubr(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0110 0101 ..00 0011 100. .... .... ....
+  //  size<23:22> | opc<19:16> = 0011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FSUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::ftmad(const ZRegister& zd,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      int imm3) {
+  // FTMAD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<imm>
+  //  0110 0101 ..01 0... 1000 00.. .... ....
+  //  size<23:22> | imm3<18:16> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FTMAD_z_zzi | SVESize(zd) | Rd(zd) | Rn(zm) |
+       ImmUnsignedField<18, 16>(imm3));
+}
+
+// SVEFPArithmeticUnpredicated.
+
+void Assembler::fadd(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0000 00.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmul(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0000 10.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::frecps(const ZRegister& zd,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FRECPS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0001 10.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRECPS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::frsqrts(const ZRegister& zd,
+                        const ZRegister& zn,
+                        const ZRegister& zm) {
+  // FRSQRTS <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0001 11.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRSQRTS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fsub(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0000 01.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ftsmul(const ZRegister& zd,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FTSMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 0000 11.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FTSMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEFPCompareVectors.
+
+void Assembler::facge(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 110. .... ...1 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FACGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::facgt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 111. .... ...1 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FACGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmeq(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 011. .... ...0 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FCMEQ_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmge(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 010. .... ...0 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FCMGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmgt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 010. .... ...1 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FCMGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmne(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 011. .... ...1 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FCMNE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fcmuo(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..0. .... 110. .... ...0 ....
+  //  size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> |
+  //  o3<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FCMUO_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+// SVEFPCompareWithZero.
+
+void Assembler::fcmeq(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0010 001. .... ...0 ....
+  //  size<23:22> | eq<17> = 1 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMEQ_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmge(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0000 001. .... ...0 ....
+  //  size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMGE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmgt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0000 001. .... ...1 ....
+  //  size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMGT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmle(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0001 001. .... ...1 ....
+  //  size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMLE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmlt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0001 001. .... ...0 ....
+  //  size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMLT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcmne(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      double zero) {
+  // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #0.0
+  //  0110 0101 ..01 0011 001. .... ...0 ....
+  //  size<23:22> | eq<17> = 1 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(zero == 0.0);
+  USE(zero);
+
+  Emit(FCMNE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPComplexAddition.
+
+void Assembler::fcadd(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      int rot) {
+  // FCADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
+  //  0110 0100 ..00 000. 100. .... .... ....
+  //  size<23:22> | rot<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((rot == 90) || (rot == 270));
+
+  Instr rotate_bit = (rot == 90) ? 0 : (1 << 16);
+  Emit(FCADD_z_p_zz | rotate_bit | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEFPComplexMulAdd.
+
+void Assembler::fcmla(const ZRegister& zda,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      int rot) {
+  // FCMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
+  //  0110 0100 ..0. .... 0... .... .... ....
+  //  size<23:22> | Zm<20:16> | rot<14:13> | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+  Instr rotate_bit = (rot / 90) << 13;
+  Emit(FCMLA_z_p_zzz | rotate_bit | SVESize(zda) | Rd(zda) | PgLow8(pg) |
+       Rn(zn) | Rm(zm));
+}
+
+// SVEFPComplexMulAddIndex.
+
+void Assembler::fcmla(const ZRegister& zda,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      int index,
+                      int rot) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+  VIXL_ASSERT(index >= 0);
+
+  int lane_size = zda.GetLaneSizeInBytes();
+
+  Instr zm_and_idx = 0;
+  Instr op = FCMLA_z_zzzi_h;
+  if (lane_size == kHRegSizeInBytes) {
+    // Zm<18:16> | i2<20:19>
+    VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3));
+    zm_and_idx = (index << 19) | Rx<18, 16>(zm);
+  } else {
+    // Zm<19:16> | i1<20>
+    VIXL_ASSERT(lane_size == kSRegSizeInBytes);
+    VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1));
+    zm_and_idx = (index << 20) | Rx<19, 16>(zm);
+    op = FCMLA_z_zzzi_s;
+  }
+
+  Instr rotate_bit = (rot / 90) << 10;
+  Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn));
+}
+
+// SVEFPFastReduction.
+
+void Assembler::faddv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // FADDV <V><d>, <Pg>, <Zn>.<T>
+  //  0110 0101 ..00 0000 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+  Emit(FADDV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fmaxnmv(const VRegister& vd,
+                        const PRegister& pg,
+                        const ZRegister& zn) {
+  // FMAXNMV <V><d>, <Pg>, <Zn>.<T>
+  //  0110 0101 ..00 0100 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+  Emit(FMAXNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fmaxv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // FMAXV <V><d>, <Pg>, <Zn>.<T>
+  //  0110 0101 ..00 0110 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+  Emit(FMAXV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fminnmv(const VRegister& vd,
+                        const PRegister& pg,
+                        const ZRegister& zn) {
+  // FMINNMV <V><d>, <Pg>, <Zn>.<T>
+  //  0110 0101 ..00 0101 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+  Emit(FMINNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fminv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // FMINV <V><d>, <Pg>, <Zn>.<T>
+  //  0110 0101 ..00 0111 001. .... .... ....
+  //  size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(AreSameLaneSize(zn, vd));
+
+  Emit(FMINV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPMulAdd.
+
+void Assembler::fmad(const ZRegister& zdn,
+                     const PRegisterM& pg,
+                     const ZRegister& zm,
+                     const ZRegister& za) {
+  // FMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0110 0101 ..1. .... 100. .... .... ....
+  //  size<23:22> | Za<20:16> | opc<14:13> = 00 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fmla(const ZRegister& zda,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..1. .... 000. .... .... ....
+  //  size<23:22> | Zm<20:16> | opc<14:13> = 00 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmls(const ZRegister& zda,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // FMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..1. .... 001. .... .... ....
+  //  size<23:22> | Zm<20:16> | opc<14:13> = 01 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmsb(const ZRegister& zdn,
+                     const PRegisterM& pg,
+                     const ZRegister& zm,
+                     const ZRegister& za) {
+  // FMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0110 0101 ..1. .... 101. .... .... ....
+  //  size<23:22> | Za<20:16> | opc<14:13> = 01 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fnmad(const ZRegister& zdn,
+                      const PRegisterM& pg,
+                      const ZRegister& zm,
+                      const ZRegister& za) {
+  // FNMAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0110 0101 ..1. .... 110. .... .... ....
+  //  size<23:22> | Za<20:16> | opc<14:13> = 10 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FNMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+void Assembler::fnmla(const ZRegister& zda,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FNMLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..1. .... 010. .... .... ....
+  //  size<23:22> | Zm<20:16> | opc<14:13> = 10 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FNMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fnmls(const ZRegister& zda,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // FNMLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0110 0101 ..1. .... 011. .... .... ....
+  //  size<23:22> | Zm<20:16> | opc<14:13> = 11 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FNMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fnmsb(const ZRegister& zdn,
+                      const PRegisterM& pg,
+                      const ZRegister& zm,
+                      const ZRegister& za) {
+  // FNMSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0110 0101 ..1. .... 111. .... .... ....
+  //  size<23:22> | Za<20:16> | opc<14:13> = 11 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
+}
+
+Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
+                                     const ZRegister& zm,
+                                     int index,
+                                     Instr op_h,
+                                     Instr op_s,
+                                     Instr op_d) {
+  Instr size = lane_size_in_bytes_log2 << SVESize_offset;
+  Instr zm_with_index = Rm(zm);
+  Instr op = 0xffffffff;
+  // Allowable register number and lane index depends on the lane size.
+  switch (lane_size_in_bytes_log2) {
+    case kHRegSizeInBytesLog2:
+      VIXL_ASSERT(zm.GetCode() <= 7);
+      VIXL_ASSERT(IsUint3(index));
+      // For H-sized lanes, size is encoded as 0b0x, where x is used as the top
+      // bit of the index. So, if index is less than four, the top bit of index
+      // is zero, and therefore size is 0b00. Otherwise, it's 0b01, the usual
+      // encoding for H-sized lanes.
+      if (index < 4) size = 0;
+      // Top two bits of "zm" encode the index.
+      zm_with_index |= (index & 3) << (Rm_offset + 3);
+      op = op_h;
+      break;
+    case kSRegSizeInBytesLog2:
+      VIXL_ASSERT(zm.GetCode() <= 7);
+      VIXL_ASSERT(IsUint2(index));
+      // Top two bits of "zm" encode the index.
+      zm_with_index |= (index & 3) << (Rm_offset + 3);
+      op = op_s;
+      break;
+    case kDRegSizeInBytesLog2:
+      VIXL_ASSERT(zm.GetCode() <= 15);
+      VIXL_ASSERT(IsUint1(index));
+      // Top bit of "zm" encodes the index.
+      zm_with_index |= (index & 1) << (Rm_offset + 4);
+      op = op_d;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+  return op | zm_with_index | size;
+}
+
+// SVEFPMulAddIndex.
+
+void Assembler::fmla(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm,
+                     int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+  // The encoding of opcode, index, Zm, and size are synthesized in this
+  // variable.
+  Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+                                             zm,
+                                             index,
+                                             FMLA_z_zzzi_h,
+                                             FMLA_z_zzzi_s,
+                                             FMLA_z_zzzi_d);
+
+  Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::fmls(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm,
+                     int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+  // The encoding of opcode, index, Zm, and size are synthesized in this
+  // variable.
+  Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+                                             zm,
+                                             index,
+                                             FMLS_z_zzzi_h,
+                                             FMLS_z_zzzi_s,
+                                             FMLS_z_zzzi_d);
+
+  Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+// SVEFPMulIndex.
+
+// This prototype maps to 3 instruction encodings:
+void Assembler::fmul(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm,
+                     unsigned index) {
+  // FMUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>[<imm>]
+  //  0110 0100 ..1. .... 0010 00.. .... ....
+  //  size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  // The encoding of opcode, index, Zm, and size are synthesized in this
+  // variable.
+  Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+                                             zm,
+                                             index,
+                                             FMUL_z_zzi_h,
+                                             FMUL_z_zzi_s,
+                                             FMUL_z_zzi_d);
+
+  Emit(synthesized_op | Rd(zd) | Rn(zn));
+}
+
+// SVEFPUnaryOpPredicated.
+
+void Assembler::fcvt(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Instr op = 0xffffffff;
+  switch (zn.GetLaneSizeInBytes()) {
+    case kHRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kSRegSizeInBytes:
+          op = FCVT_z_p_z_h2s;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVT_z_p_z_h2d;
+          break;
+      }
+      break;
+    case kSRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = FCVT_z_p_z_s2h;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVT_z_p_z_s2d;
+          break;
+      }
+      break;
+    case kDRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = FCVT_z_p_z_d2h;
+          break;
+        case kSRegSizeInBytes:
+          op = FCVT_z_p_z_d2s;
+          break;
+      }
+      break;
+  }
+  VIXL_ASSERT(op != 0xffffffff);
+
+  Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtzs(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr op = 0xffffffff;
+  switch (zn.GetLaneSizeInBytes()) {
+    case kHRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = FCVTZS_z_p_z_fp162h;
+          break;
+        case kSRegSizeInBytes:
+          op = FCVTZS_z_p_z_fp162w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZS_z_p_z_fp162x;
+          break;
+      }
+      break;
+    case kSRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kSRegSizeInBytes:
+          op = FCVTZS_z_p_z_s2w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZS_z_p_z_s2x;
+          break;
+      }
+      break;
+    case kDRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kSRegSizeInBytes:
+          op = FCVTZS_z_p_z_d2w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZS_z_p_z_d2x;
+          break;
+      }
+      break;
+  }
+  VIXL_ASSERT(op != 0xffffffff);
+
+  Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtzu(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr op = 0xffffffff;
+  switch (zn.GetLaneSizeInBytes()) {
+    case kHRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = FCVTZU_z_p_z_fp162h;
+          break;
+        case kSRegSizeInBytes:
+          op = FCVTZU_z_p_z_fp162w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZU_z_p_z_fp162x;
+          break;
+      }
+      break;
+    case kSRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kSRegSizeInBytes:
+          op = FCVTZU_z_p_z_s2w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZU_z_p_z_s2x;
+          break;
+      }
+      break;
+    case kDRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kSRegSizeInBytes:
+          op = FCVTZU_z_p_z_d2w;
+          break;
+        case kDRegSizeInBytes:
+          op = FCVTZU_z_p_z_d2x;
+          break;
+      }
+      break;
+  }
+  VIXL_ASSERT(op != 0xffffffff);
+
+  Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frecpx(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  // FRECPX <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0110 0101 ..00 1100 101. .... .... ....
+  //  size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRECPX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frinta(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTA_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frinti(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTI_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintm(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTM_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintn(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTN_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintp(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTP_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintx(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::frintz(const ZRegister& zd,
+                       const PRegisterM& pg,
+                       const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRINTZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fsqrt(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn) {
+  // FSQRT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0110 0101 ..00 1101 101. .... .... ....
+  //  size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FSQRT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::scvtf(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr op = 0xffffffff;
+  switch (zn.GetLaneSizeInBytes()) {
+    case kHRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = SCVTF_z_p_z_h2fp16;
+          break;
+      }
+      break;
+    case kSRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = SCVTF_z_p_z_w2fp16;
+          break;
+        case kSRegSizeInBytes:
+          op = SCVTF_z_p_z_w2s;
+          break;
+        case kDRegSizeInBytes:
+          op = SCVTF_z_p_z_w2d;
+          break;
+      }
+      break;
+    case kDRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = SCVTF_z_p_z_x2fp16;
+          break;
+        case kSRegSizeInBytes:
+          op = SCVTF_z_p_z_x2s;
+          break;
+        case kDRegSizeInBytes:
+          op = SCVTF_z_p_z_x2d;
+          break;
+      }
+      break;
+  }
+  VIXL_ASSERT(op != 0xffffffff);
+
+  Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::ucvtf(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Instr op = 0xffffffff;
+  switch (zn.GetLaneSizeInBytes()) {
+    case kHRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = UCVTF_z_p_z_h2fp16;
+          break;
+      }
+      break;
+    case kSRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = UCVTF_z_p_z_w2fp16;
+          break;
+        case kSRegSizeInBytes:
+          op = UCVTF_z_p_z_w2s;
+          break;
+        case kDRegSizeInBytes:
+          op = UCVTF_z_p_z_w2d;
+          break;
+      }
+      break;
+    case kDRegSizeInBytes:
+      switch (zd.GetLaneSizeInBytes()) {
+        case kHRegSizeInBytes:
+          op = UCVTF_z_p_z_x2fp16;
+          break;
+        case kSRegSizeInBytes:
+          op = UCVTF_z_p_z_x2s;
+          break;
+        case kDRegSizeInBytes:
+          op = UCVTF_z_p_z_x2d;
+          break;
+      }
+      break;
+  }
+  VIXL_ASSERT(op != 0xffffffff);
+
+  Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEFPUnaryOpUnpredicated.
+
+void Assembler::frecpe(const ZRegister& zd, const ZRegister& zn) {
+  // FRECPE <Zd>.<T>, <Zn>.<T>
+  //  0110 0101 ..00 1110 0011 00.. .... ....
+  //  size<23:22> | opc<18:16> = 110 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRECPE_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::frsqrte(const ZRegister& zd, const ZRegister& zn) {
+  // FRSQRTE <Zd>.<T>, <Zn>.<T>
+  //  0110 0101 ..00 1111 0011 00.. .... ....
+  //  size<23:22> | opc<18:16> = 111 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FRSQRTE_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+// SVEIncDecByPredicateCount.
+
+void Assembler::decp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+  // DECP <Xdn>, <Pg>.<T>
+  //  0010 0101 ..10 1101 1000 100. .... ....
+  //  size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rdn.IsX());
+
+  Emit(DECP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::decp(const ZRegister& zdn, const PRegister& pg) {
+  // DECP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1101 1000 000. .... ....
+  //  size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> |
+  //  Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(DECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::incp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+  // INCP <Xdn>, <Pg>.<T>
+  //  0010 0101 ..10 1100 1000 100. .... ....
+  //  size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rdn.IsX());
+
+  Emit(INCP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::incp(const ZRegister& zdn, const PRegister& pg) {
+  // INCP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1100 1000 000. .... ....
+  //  size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> |
+  //  Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(INCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const Register& xd,
+                       const PRegisterWithLaneSize& pg,
+                       const Register& wn) {
+  // SQDECP <Xdn>, <Pg>.<T>, <Wdn>
+  //  0010 0101 ..10 1010 1000 100. .... ....
+  //  size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  USE(wn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn));
+
+  Emit(SQDECP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+  // SQDECP <Xdn>, <Pg>.<T>
+  //  0010 0101 ..10 1010 1000 110. .... ....
+  //  size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xdn.IsX());
+
+  Emit(SQDECP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqdecp(const ZRegister& zdn, const PRegister& pg) {
+  // SQDECP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1010 1000 000. .... ....
+  //  size<23:22> | D<17> = 1 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(SQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::sqincp(const Register& xd,
+                       const PRegisterWithLaneSize& pg,
+                       const Register& wn) {
+  // SQINCP <Xdn>, <Pg>.<T>, <Wdn>
+  //  0010 0101 ..10 1000 1000 100. .... ....
+  //  size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  USE(wn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn));
+
+  Emit(SQINCP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+  // SQINCP <Xdn>, <Pg>.<T>
+  //  0010 0101 ..10 1000 1000 110. .... ....
+  //  size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xdn.IsX());
+
+  Emit(SQINCP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::sqincp(const ZRegister& zdn, const PRegister& pg) {
+  // SQINCP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1000 1000 000. .... ....
+  //  size<23:22> | D<17> = 0 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(SQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+  // UQDECP <Wdn>, <Pg>.<T>
+  // UQDECP <Xdn>, <Pg>.<T>
+  //  0010 0101 ..10 1011 1000 10.. .... ....
+  //  size<23:22> | D<17> = 1 | U<16> = 1 | sf<10> | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Instr op = rdn.IsX() ? UQDECP_r_p_r_x : UQDECP_r_p_r_uw;
+  Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::uqdecp(const ZRegister& zdn, const PRegister& pg) {
+  // UQDECP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1011 1000 000. .... ....
+  //  size<23:22> | D<17> = 1 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(UQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+void Assembler::uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+  // UQINCP <Wdn>, <Pg>.<T>
+  //  0010 0101 ..10 1001 1000 100. .... ....
+  //  size<23:22> | D<17> = 0 | U<16> = 1 | sf<10> = 0 | op<9> = 0 | Pg<8:5> |
+  //  Rdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Instr op = rdn.IsX() ? UQINCP_r_p_r_x : UQINCP_r_p_r_uw;
+  Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg));
+}
+
+void Assembler::uqincp(const ZRegister& zdn, const PRegister& pg) {
+  // UQINCP <Zdn>.<T>, <Pg>
+  //  0010 0101 ..10 1001 1000 000. .... ....
+  //  size<23:22> | D<17> = 0 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+  VIXL_ASSERT(pg.IsUnqualified());
+
+  Emit(UQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg));
+}
+
+// SVEIndexGeneration.
+
+void Assembler::index(const ZRegister& zd, int start, int step) {
+  // INDEX <Zd>.<T>, #<imm1>, #<imm2>
+  //  0000 0100 ..1. .... 0100 00.. .... ....
+  //  size<23:22> | step<20:16> | start<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(INDEX_z_ii | SVESize(zd) | ImmField<20, 16>(step) |
+       ImmField<9, 5>(start) | Rd(zd));
+}
+
+void Assembler::index(const ZRegister& zd,
+                      const Register& rn,
+                      const Register& rm) {
+  // INDEX <Zd>.<T>, <R><n>, <R><m>
+  //  0000 0100 ..1. .... 0100 11.. .... ....
+  //  size<23:22> | Rm<20:16> | Rn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+              zd.GetLaneSizeInBits());
+  VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >=
+              zd.GetLaneSizeInBits());
+
+  Emit(INDEX_z_rr | SVESize(zd) | Rd(zd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::index(const ZRegister& zd, const Register& rn, int imm5) {
+  // INDEX <Zd>.<T>, <R><n>, #<imm>
+  //  0000 0100 ..1. .... 0100 01.. .... ....
+  //  size<23:22> | imm5<20:16> | Rn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+              zd.GetLaneSizeInBits());
+
+  Emit(INDEX_z_ri | SVESize(zd) | Rd(zd) | Rn(rn) | ImmField<20, 16>(imm5));
+}
+
+void Assembler::index(const ZRegister& zd, int imm5, const Register& rm) {
+  // INDEX <Zd>.<T>, #<imm>, <R><m>
+  //  0000 0100 ..1. .... 0100 10.. .... ....
+  //  size<23:22> | Rm<20:16> | imm5<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(static_cast<unsigned>(rm.GetSizeInBits()) >=
+              zd.GetLaneSizeInBits());
+
+  Emit(INDEX_z_ir | SVESize(zd) | Rd(zd) | ImmField<9, 5>(imm5) | Rm(rm));
+}
+
+// SVEIntArithmeticUnpredicated.
+
+void Assembler::add(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // ADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0000 00.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(ADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqadd(const ZRegister& zd,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // SQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0001 00.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(SQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqsub(const ZRegister& zd,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // SQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0001 10.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(SQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sub(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // SUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0000 01.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(SUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uqadd(const ZRegister& zd,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // UQADD <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0001 01.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(UQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uqsub(const ZRegister& zd,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // UQSUB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 0001 11.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(UQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEIntBinaryArithmeticPredicated.
+
+void Assembler::add(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // ADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 0000 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(ADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::and_(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // AND <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 1010 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(AND_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::bic(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // BIC <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 1011 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(BIC_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::eor(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // EOR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 1001 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(EOR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::mul(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // MUL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0000 000. .... .... ....
+  //  size<23:22> | H<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(MUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::orr(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // ORR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 1000 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(ORR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sabd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // SABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1100 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 10 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sdiv(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // SDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0100 000. .... .... ....
+  //  size<23:22> | R<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(SDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sdivr(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // SDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0110 000. .... .... ....
+  //  size<23:22> | R<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(SDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smax(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // SMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1000 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 00 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smin(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // SMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1010 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 01 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::smulh(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // SMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0010 000. .... .... ....
+  //  size<23:22> | H<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sub(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // SUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 0001 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::subr(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // SUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 0011 000. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(SUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uabd(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UABD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1101 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 10 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(UABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::udiv(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UDIV <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0101 000. .... .... ....
+  //  size<23:22> | R<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(UDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::udivr(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // UDIVR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0111 000. .... .... ....
+  //  size<23:22> | R<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+  VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(UDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umax(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UMAX <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1001 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 00 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(UMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umin(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UMIN <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..00 1011 000. .... .... ....
+  //  size<23:22> | opc<18:17> = 01 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(UMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umulh(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  // UMULH <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0100 ..01 0011 000. .... .... ....
+  //  size<23:22> | H<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(UMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEIntCompareScalars.
+
+void Assembler::ctermeq(const Register& rn, const Register& rm) {
+  // CTERMEQ <R><n>, <R><m>
+  //  0010 0101 1.1. .... 0010 00.. ...0 0000
+  //  op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 0
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000;
+
+  Emit(CTERMEQ_rr | sz | Rn(rn) | Rm(rm));
+}
+
+void Assembler::ctermne(const Register& rn, const Register& rm) {
+  // CTERMNE <R><n>, <R><m>
+  //  0010 0101 1.1. .... 0010 00.. ...1 0000
+  //  op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 1
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000;
+
+  Emit(CTERMNE_rr | sz | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilele(const PRegisterWithLaneSize& pd,
+                        const Register& rn,
+                        const Register& rm) {
+  // WHILELE <Pd>.<T>, <R><n>, <R><m>
+  //  0010 0101 ..1. .... 000. 01.. ...1 ....
+  //  size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> |
+  //  eq<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+  Emit(WHILELE_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilelo(const PRegisterWithLaneSize& pd,
+                        const Register& rn,
+                        const Register& rm) {
+  // WHILELO <Pd>.<T>, <R><n>, <R><m>
+  //  0010 0101 ..1. .... 000. 11.. ...0 ....
+  //  size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> |
+  //  eq<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+  Emit(WHILELO_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilels(const PRegisterWithLaneSize& pd,
+                        const Register& rn,
+                        const Register& rm) {
+  // WHILELS <Pd>.<T>, <R><n>, <R><m>
+  //  0010 0101 ..1. .... 000. 11.. ...1 ....
+  //  size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> |
+  //  eq<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+  Emit(WHILELS_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilelt(const PRegisterWithLaneSize& pd,
+                        const Register& rn,
+                        const Register& rm) {
+  // WHILELT <Pd>.<T>, <R><n>, <R><m>
+  //  0010 0101 ..1. .... 000. 01.. ...0 ....
+  //  size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> |
+  //  eq<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+  const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+  Emit(WHILELT_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+                               const PRegisterZ& pg,
+                               const ZRegister& zn,
+                               const ZRegister& zm,
+                               SVEIntCompareVectorsOp op) {
+  Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+                               const PRegisterZ& pg,
+                               const ZRegister& zn,
+                               int imm,
+                               SVEIntCompareSignedImmOp op) {
+  Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm));
+}
+
+void Assembler::CompareVectors(const PRegisterWithLaneSize& pd,
+                               const PRegisterZ& pg,
+                               const ZRegister& zn,
+                               unsigned imm,
+                               SVEIntCompareUnsignedImmOp op) {
+  Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) |
+       ImmUnsignedField<20, 14>(imm));
+}
+
+void Assembler::cmp(Condition cond,
+                    const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  switch (cond) {
+    case eq:
+      cmpeq(pd, pg, zn, zm);
+      break;
+    case ge:
+      cmpge(pd, pg, zn, zm);
+      break;
+    case gt:
+      cmpgt(pd, pg, zn, zm);
+      break;
+    case le:
+      cmple(pd, pg, zn, zm);
+      break;
+    case lt:
+      cmplt(pd, pg, zn, zm);
+      break;
+    case ne:
+      cmpne(pd, pg, zn, zm);
+      break;
+    case hi:
+      cmphi(pd, pg, zn, zm);
+      break;
+    case hs:
+      cmphs(pd, pg, zn, zm);
+      break;
+    case lo:
+      cmplo(pd, pg, zn, zm);
+      break;
+    case ls:
+      cmpls(pd, pg, zn, zm);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+}
+
+// SVEIntCompareSignedImm.
+
+void Assembler::cmpeq(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 100. .... ...0 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPEQ_p_p_zi);
+}
+
+void Assembler::cmpge(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 000. .... ...0 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPGE_p_p_zi);
+}
+
+void Assembler::cmpgt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 000. .... ...1 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPGT_p_p_zi);
+}
+
+void Assembler::cmple(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 001. .... ...1 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPLE_p_p_zi);
+}
+
+void Assembler::cmplt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 001. .... ...0 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPLT_p_p_zi);
+}
+
+void Assembler::cmpne(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      int imm5) {
+  // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0101 ..0. .... 100. .... ...1 ....
+  //  size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5>
+  //  | ne<4> = 1 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm5, CMPNE_p_p_zi);
+}
+
+// SVEIntCompareUnsignedImm.
+
+void Assembler::cmphi(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      unsigned imm7) {
+  // CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0100 ..1. .... ..0. .... ...1 ....
+  //  size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm7, CMPHI_p_p_zi);
+}
+
+void Assembler::cmphs(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      unsigned imm7) {
+  // CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0100 ..1. .... ..0. .... ...0 ....
+  //  size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm7, CMPHS_p_p_zi);
+}
+
+void Assembler::cmplo(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      unsigned imm7) {
+  // CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0100 ..1. .... ..1. .... ...0 ....
+  //  size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm7, CMPLO_p_p_zi);
+}
+
+void Assembler::cmpls(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      unsigned imm7) {
+  // CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm>
+  //  0010 0100 ..1. .... ..1. .... ...1 ....
+  //  size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+
+  CompareVectors(pd, pg, zn, imm7, CMPLS_p_p_zi);
+}
+
+// SVEIntCompareVectors.
+
+// This prototype maps to 2 instruction encodings:
+//  CMPEQ_p_p_zw
+//  CMPEQ_p_p_zz
+void Assembler::cmpeq(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPEQ_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPEQ_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  CMPGE_p_p_zw
+//  CMPGE_p_p_zz
+void Assembler::cmpge(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPGE_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPGE_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  CMPGT_p_p_zw
+//  CMPGT_p_p_zz
+void Assembler::cmpgt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPGT_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPGT_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  CMPHI_p_p_zw
+//  CMPHI_p_p_zz
+void Assembler::cmphi(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPHI_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPHI_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  CMPHS_p_p_zw
+//  CMPHS_p_p_zz
+void Assembler::cmphs(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPHS_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPHS_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+void Assembler::cmple(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  if (AreSameLaneSize(zn, zm)) {
+    cmpge(pd, pg, zm, zn);
+    return;
+  }
+  VIXL_ASSERT(zm.IsLaneSizeD());
+  VIXL_ASSERT(!zn.IsLaneSizeD());
+
+  CompareVectors(pd, pg, zn, zm, CMPLE_p_p_zw);
+}
+
+void Assembler::cmplo(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  if (AreSameLaneSize(zn, zm)) {
+    cmphi(pd, pg, zm, zn);
+    return;
+  }
+  VIXL_ASSERT(zm.IsLaneSizeD());
+  VIXL_ASSERT(!zn.IsLaneSizeD());
+
+  CompareVectors(pd, pg, zn, zm, CMPLO_p_p_zw);
+}
+
+void Assembler::cmpls(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  if (AreSameLaneSize(zn, zm)) {
+    cmphs(pd, pg, zm, zn);
+    return;
+  }
+  VIXL_ASSERT(zm.IsLaneSizeD());
+  VIXL_ASSERT(!zn.IsLaneSizeD());
+
+  CompareVectors(pd, pg, zn, zm, CMPLS_p_p_zw);
+}
+
+void Assembler::cmplt(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  if (AreSameLaneSize(zn, zm)) {
+    cmpgt(pd, pg, zm, zn);
+    return;
+  }
+  VIXL_ASSERT(zm.IsLaneSizeD());
+  VIXL_ASSERT(!zn.IsLaneSizeD());
+
+  CompareVectors(pd, pg, zn, zm, CMPLT_p_p_zw);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  CMPNE_p_p_zw
+//  CMPNE_p_p_zz
+void Assembler::cmpne(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const ZRegister& zn,
+                      const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, zn));
+  SVEIntCompareVectorsOp op = CMPNE_p_p_zz;
+  if (!AreSameLaneSize(zn, zm)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+    op = CMPNE_p_p_zw;
+  }
+  CompareVectors(pd, pg, zn, zm, op);
+}
+
+// SVEIntMiscUnpredicated.
+
+void Assembler::fexpa(const ZRegister& zd, const ZRegister& zn) {
+  // FEXPA <Zd>.<T>, <Zn>.<T>
+  //  0000 0100 ..10 0000 1011 10.. .... ....
+  //  size<23:22> | opc<20:16> = 00000 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FEXPA_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::ftssel(const ZRegister& zd,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // FTSSEL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..1. .... 1011 00.. .... ....
+  //  size<23:22> | Zm<20:16> | op<10> = 0 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FTSSEL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::movprfx(const ZRegister& zd, const ZRegister& zn) {
+  // MOVPRFX <Zd>, <Zn>
+  //  0000 0100 0010 0000 1011 11.. .... ....
+  //  opc<23:22> = 00 | opc2<20:16> = 00000 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(MOVPRFX_z_z | Rd(zd) | Rn(zn));
+}
+
+// SVEIntMulAddPredicated.
+
+void Assembler::mad(const ZRegister& zdn,
+                    const PRegisterM& pg,
+                    const ZRegister& zm,
+                    const ZRegister& za) {
+  // MAD <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0000 0100 ..0. .... 110. .... .... ....
+  //  size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Za<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+
+  Emit(MAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za));
+}
+
+void Assembler::mla(const ZRegister& zda,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // MLA <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..0. .... 010. .... .... ....
+  //  size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+  Emit(MLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::mls(const ZRegister& zda,
+                    const PRegisterM& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // MLS <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>
+  //  0000 0100 ..0. .... 011. .... .... ....
+  //  size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+  Emit(MLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::msb(const ZRegister& zdn,
+                    const PRegisterM& pg,
+                    const ZRegister& zm,
+                    const ZRegister& za) {
+  // MSB <Zdn>.<T>, <Pg>/M, <Zm>.<T>, <Za>.<T>
+  //  0000 0100 ..0. .... 111. .... .... ....
+  //  size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Za<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zdn, zm, za));
+
+  Emit(MSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za));
+}
+
+// SVEIntMulAddUnpredicated.
+
+void Assembler::sdot(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+  VIXL_ASSERT(AreSameLaneSize(zm, zn));
+
+  Emit(SDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::udot(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+  VIXL_ASSERT(AreSameLaneSize(zm, zn));
+
+  Emit(UDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// SVEIntReduction.
+
+void Assembler::andv(const VRegister& vd,
+                     const PRegister& pg,
+                     const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(ANDV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::eorv(const VRegister& vd,
+                     const PRegister& pg,
+                     const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(EORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::movprfx(const ZRegister& zd,
+                        const PRegister& pg,
+                        const ZRegister& zn) {
+  // MOVPRFX <Zd>.<T>, <Pg>/<ZM>, <Zn>.<T>
+  //  0000 0100 ..01 000. 001. .... .... ....
+  //  size<23:22> | opc<18:17> = 00 | M<16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+  VIXL_ASSERT(!pg.HasLaneSize());
+
+  Instr m = pg.IsMerging() ? 0x00010000 : 0x00000000;
+  Emit(MOVPRFX_z_p_z | SVESize(zd) | m | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::orv(const VRegister& vd,
+                    const PRegister& pg,
+                    const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(ORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::saddv(const VRegister& dd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zn.GetLaneSizeInBytes() != kDRegSizeInBytes);
+
+  Emit(SADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::smaxv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(SMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sminv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(SMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uaddv(const VRegister& dd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(UADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::umaxv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(UMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uminv(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(UMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEIntUnaryArithmeticPredicated.
+
+void Assembler::abs(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  // ABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0110 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(ABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cls(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  // CLS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1000 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(CLS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::clz(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  // CLZ <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1001 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(CLZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cnot(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // CNOT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1011 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(CNOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cnt(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  // CNT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1010 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(CNT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fabs(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // FABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1100 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fneg(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // FNEG <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1101 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Emit(FNEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::neg(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  // NEG <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0111 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(NEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::not_(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // NOT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 1110 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(NOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxtb(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // SXTB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0000 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes);
+
+  Emit(SXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxth(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // SXTH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0010 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes);
+
+  Emit(SXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sxtw(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // SXTW <Zd>.D, <Pg>/M, <Zn>.D
+  //  0000 0100 ..01 0100 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes);
+
+  Emit(SXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxtb(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // UXTB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0001 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes);
+
+  Emit(UXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxth(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // UXTH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0100 ..01 0011 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes);
+
+  Emit(UXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uxtw(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // UXTW <Zd>.D, <Pg>/M, <Zn>.D
+  //  0000 0100 ..01 0101 101. .... .... ....
+  //  size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes);
+
+  Emit(UXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+// SVEIntWideImmPredicated.
+
+void Assembler::cpy(const ZRegister& zd,
+                    const PRegister& pg,
+                    int imm8,
+                    int shift) {
+  // CPY <Zd>.<T>, <Pg>/<ZM>, #<imm>{, <shift>}
+  //  0000 0101 ..01 .... 0... .... .... ....
+  //  size<23:22> | Pg<19:16> | M<14> | sh<13> | imm8<12:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+
+  ResolveSVEImm8Shift(&imm8, &shift);
+
+  Instr sh = (shift > 0) ? (1 << 13) : 0;
+  Instr m = pg.IsMerging() ? (1 << 14) : 0;
+  Emit(CPY_z_p_i | m | sh | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) |
+       ImmField<12, 5>(imm8));
+}
+
+void Assembler::fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) {
+  // FCPY <Zd>.<T>, <Pg>/M, #<const>
+  //  0000 0101 ..01 .... 110. .... .... ....
+  //  size<23:22> | Pg<19:16> | imm8<12:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Instr imm_field = ImmUnsignedField<12, 5>(FP64ToImm8(imm));
+  Emit(FCPY_z_p_i | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | imm_field);
+}
+
+// SVEIntAddSubtractImmUnpredicated.
+
+void Assembler::SVEIntAddSubtractImmUnpredicatedHelper(
+    SVEIntAddSubtractImm_UnpredicatedOp op,
+    const ZRegister& zd,
+    int imm8,
+    int shift) {
+  if (shift < 0) {
+    VIXL_ASSERT(shift == -1);
+    // Derive the shift amount from the immediate.
+    if (IsUint8(imm8)) {
+      shift = 0;
+    } else if (IsUint16(imm8) && ((imm8 % 256) == 0)) {
+      imm8 /= 256;
+      shift = 8;
+    }
+  }
+
+  VIXL_ASSERT(IsUint8(imm8));
+  VIXL_ASSERT((shift == 0) || (shift == 8));
+
+  Instr shift_bit = (shift > 0) ? (1 << 13) : 0;
+  Emit(op | SVESize(zd) | Rd(zd) | shift_bit | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::add(const ZRegister& zd,
+                    const ZRegister& zn,
+                    int imm8,
+                    int shift) {
+  // ADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0000 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(ADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::dup(const ZRegister& zd, int imm8, int shift) {
+  // DUP <Zd>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..11 1000 11.. .... .... ....
+  //  size<23:22> | opc<18:17> = 00 | sh<13> | imm8<12:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  ResolveSVEImm8Shift(&imm8, &shift);
+  VIXL_ASSERT((shift < 8) || !zd.IsLaneSizeB());
+
+  Instr shift_bit = (shift > 0) ? (1 << 13) : 0;
+  Emit(DUP_z_i | SVESize(zd) | Rd(zd) | shift_bit | ImmField<12, 5>(imm8));
+}
+
+void Assembler::fdup(const ZRegister& zd, double imm) {
+  // FDUP <Zd>.<T>, #<const>
+  //  0010 0101 ..11 1001 110. .... .... ....
+  //  size<23:22> | opc<18:17> = 00 | o2<13> = 0 | imm8<12:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+  Instr encoded_imm = FP64ToImm8(imm) << 5;
+  Emit(FDUP_z_i | SVESize(zd) | encoded_imm | Rd(zd));
+}
+
+void Assembler::mul(const ZRegister& zd, const ZRegister& zn, int imm8) {
+  // MUL <Zdn>.<T>, <Zdn>.<T>, #<imm>
+  //  0010 0101 ..11 0000 110. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(MUL_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::smax(const ZRegister& zd, const ZRegister& zn, int imm8) {
+  // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+  //  0010 0101 ..10 1000 110. .... .... ....
+  //  size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(SMAX_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::smin(const ZRegister& zd, const ZRegister& zn, int imm8) {
+  // SMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+  //  0010 0101 ..10 1010 110. .... .... ....
+  //  size<23:22> | opc<18:16> = 010 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(SMIN_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8));
+}
+
+void Assembler::sqadd(const ZRegister& zd,
+                      const ZRegister& zn,
+                      int imm8,
+                      int shift) {
+  // SQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0100 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 100 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(SQADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::sqsub(const ZRegister& zd,
+                      const ZRegister& zn,
+                      int imm8,
+                      int shift) {
+  // SQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0110 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 110 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(SQSUB_z_zi, zd, imm8, shift);
+}
+
+void Assembler::sub(const ZRegister& zd,
+                    const ZRegister& zn,
+                    int imm8,
+                    int shift) {
+  // SUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0001 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(SUB_z_zi, zd, imm8, shift);
+}
+
+void Assembler::subr(const ZRegister& zd,
+                     const ZRegister& zn,
+                     int imm8,
+                     int shift) {
+  // SUBR <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0011 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(SUBR_z_zi, zd, imm8, shift);
+}
+
+void Assembler::umax(const ZRegister& zd, const ZRegister& zn, int imm8) {
+  // UMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
+  //  0010 0101 ..10 1001 110. .... .... ....
+  //  size<23:22> | opc<18:16> = 001 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(UMAX_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::umin(const ZRegister& zd, const ZRegister& zn, int imm8) {
+  // UMIN <Zdn>.<T>, <Zdn>.<T>, #<imm>
+  //  0010 0101 ..10 1011 110. .... .... ....
+  //  size<23:22> | opc<18:16> = 011 | o2<13> = 0 | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(UMIN_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8));
+}
+
+void Assembler::uqadd(const ZRegister& zd,
+                      const ZRegister& zn,
+                      int imm8,
+                      int shift) {
+  // UQADD <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0101 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 101 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(UQADD_z_zi, zd, imm8, shift);
+}
+
+void Assembler::uqsub(const ZRegister& zd,
+                      const ZRegister& zn,
+                      int imm8,
+                      int shift) {
+  // UQSUB <Zdn>.<T>, <Zdn>.<T>, #<imm>{, <shift>}
+  //  0010 0101 ..10 0111 11.. .... .... ....
+  //  size<23:22> | opc<18:16> = 111 | sh<13> | imm8<12:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  SVEIntAddSubtractImmUnpredicatedHelper(UQSUB_z_zi, zd, imm8, shift);
+}
+
+// SVEMemLoad.
+
+void Assembler::SVELdSt1Helper(unsigned msize_in_bytes_log2,
+                               const ZRegister& zt,
+                               const PRegister& pg,
+                               const SVEMemOperand& addr,
+                               bool is_signed,
+                               Instr op) {
+  VIXL_ASSERT(addr.IsContiguous());
+
+  Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr);
+  Instr dtype =
+      SVEDtype(msize_in_bytes_log2, zt.GetLaneSizeInBytesLog2(), is_signed);
+  Emit(op | mem_op | dtype | Rt(zt) | PgLow8(pg));
+}
+
+void Assembler::SVELdSt234Helper(int num_regs,
+                                 const ZRegister& zt1,
+                                 const PRegister& pg,
+                                 const SVEMemOperand& addr,
+                                 Instr op) {
+  VIXL_ASSERT((num_regs >= 2) && (num_regs <= 4));
+
+  unsigned msize_in_bytes_log2 = zt1.GetLaneSizeInBytesLog2();
+  Instr num = (num_regs - 1) << 21;
+  Instr msz = msize_in_bytes_log2 << 23;
+  Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, num_regs, addr);
+  Emit(op | mem_op | msz | num | Rt(zt1) | PgLow8(pg));
+}
+
+void Assembler::SVELd1Helper(unsigned msize_in_bytes_log2,
+                             const ZRegister& zt,
+                             const PRegisterZ& pg,
+                             const SVEMemOperand& addr,
+                             bool is_signed) {
+  VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+  if (is_signed) {
+    // Sign-extension is only possible when the vector elements are larger than
+    // the elements in memory.
+    VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+  }
+
+  if (addr.IsScatterGather()) {
+    bool is_load = true;
+    bool is_ff = false;
+    SVEScatterGatherHelper(msize_in_bytes_log2,
+                           zt,
+                           pg,
+                           addr,
+                           is_load,
+                           is_signed,
+                           is_ff);
+    return;
+  }
+
+  Instr op = 0xffffffff;
+  if (addr.IsScalarPlusImmediate()) {
+    op = SVEContiguousLoad_ScalarPlusImmFixed;
+  } else if (addr.IsScalarPlusScalar()) {
+    // Rm must not be xzr.
+    VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+    op = SVEContiguousLoad_ScalarPlusScalarFixed;
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+  SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op);
+}
+
+void Assembler::SVELdff1Helper(unsigned msize_in_bytes_log2,
+                               const ZRegister& zt,
+                               const PRegisterZ& pg,
+                               const SVEMemOperand& addr,
+                               bool is_signed) {
+  VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+  if (is_signed) {
+    // Sign-extension is only possible when the vector elements are larger than
+    // the elements in memory.
+    VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+  }
+
+  if (addr.IsScatterGather()) {
+    bool is_load = true;
+    bool is_ff = true;
+    SVEScatterGatherHelper(msize_in_bytes_log2,
+                           zt,
+                           pg,
+                           addr,
+                           is_load,
+                           is_signed,
+                           is_ff);
+    return;
+  }
+
+  if (addr.IsPlainScalar()) {
+    // SVEMemOperand(x0) is treated as a scalar-plus-immediate form ([x0, #0]).
+    // In these instructions, we want to treat it as [x0, xzr].
+    SVEMemOperand addr_scalar_plus_scalar(addr.GetScalarBase(), xzr);
+    // Guard against infinite recursion.
+    VIXL_ASSERT(!addr_scalar_plus_scalar.IsPlainScalar());
+    SVELdff1Helper(msize_in_bytes_log2,
+                   zt,
+                   pg,
+                   addr_scalar_plus_scalar,
+                   is_signed);
+    return;
+  }
+
+  Instr op = 0xffffffff;
+  if (addr.IsScalarPlusScalar()) {
+    op = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed;
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+  SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op);
+}
+
+void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
+                                       const ZRegister& zt,
+                                       const PRegister& pg,
+                                       const SVEMemOperand& addr,
+                                       bool is_load,
+                                       bool is_signed,
+                                       bool is_first_fault) {
+  VIXL_ASSERT(addr.IsScatterGather());
+  VIXL_ASSERT(zt.IsLaneSizeS() || zt.IsLaneSizeD());
+  VIXL_ASSERT(is_load || !is_first_fault);
+  VIXL_ASSERT(is_load || !is_signed);
+
+  Instr op = 0xffffffff;
+  if (addr.IsVectorPlusImmediate()) {
+    VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorBase()));
+    if (is_load) {
+      if (zt.IsLaneSizeS()) {
+        op = SVE32BitGatherLoad_VectorPlusImmFixed;
+      } else {
+        op = SVE64BitGatherLoad_VectorPlusImmFixed;
+      }
+    } else {
+      if (zt.IsLaneSizeS()) {
+        op = SVE32BitScatterStore_VectorPlusImmFixed;
+      } else {
+        op = SVE64BitScatterStore_VectorPlusImmFixed;
+      }
+    }
+  } else {
+    VIXL_ASSERT(addr.IsScalarPlusVector());
+    VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset()));
+    SVEOffsetModifier mod = addr.GetOffsetModifier();
+    if (zt.IsLaneSizeS()) {
+      VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW));
+      unsigned shift_amount = addr.GetShiftAmount();
+      if (shift_amount == 0) {
+        if (is_load) {
+          op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed;
+        } else {
+          op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed;
+        }
+      } else if (shift_amount == 1) {
+        VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2);
+        if (is_load) {
+          op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed;
+        } else {
+          op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed;
+        }
+      } else {
+        VIXL_ASSERT(shift_amount == 2);
+        VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2);
+        if (is_load) {
+          op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed;
+        } else {
+          op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed;
+        }
+      }
+    } else if (zt.IsLaneSizeD()) {
+      switch (mod) {
+        case NO_SVE_OFFSET_MODIFIER:
+          if (is_load) {
+            op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed;
+          } else {
+            op = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed;
+          }
+          break;
+        case SVE_LSL:
+          if (is_load) {
+            op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed;
+          } else {
+            op = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed;
+          }
+          break;
+        case SVE_UXTW:
+        case SVE_SXTW: {
+          unsigned shift_amount = addr.GetShiftAmount();
+          if (shift_amount == 0) {
+            if (is_load) {
+              op =
+                  SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
+            } else {
+              op =
+                  SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed;
+            }
+          } else {
+            VIXL_ASSERT(shift_amount == msize_in_bytes_log2);
+            if (is_load) {
+              op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed;
+            } else {
+              op =
+                  SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed;
+            }
+          }
+          break;
+        }
+        default:
+          VIXL_UNIMPLEMENTED();
+      }
+    }
+  }
+
+  Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load);
+  Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2);
+  Instr u = (!is_load || is_signed) ? 0 : (1 << 14);
+  Instr ff = is_first_fault ? (1 << 13) : 0;
+  Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg));
+}
+
+void Assembler::SVELd234Helper(int num_regs,
+                               const ZRegister& zt1,
+                               const PRegisterZ& pg,
+                               const SVEMemOperand& addr) {
+  if (addr.IsScalarPlusScalar()) {
+    // Rm must not be xzr.
+    VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+  }
+
+  Instr op;
+  if (addr.IsScalarPlusImmediate()) {
+    op = SVELoadMultipleStructures_ScalarPlusImmFixed;
+  } else if (addr.IsScalarPlusScalar()) {
+    op = SVELoadMultipleStructures_ScalarPlusScalarFixed;
+  } else {
+    // These instructions don't support any other addressing modes.
+    VIXL_ABORT();
+  }
+  SVELdSt234Helper(num_regs, zt1, pg, addr, op);
+}
+
+// SVEMemContiguousLoad.
+
+#define VIXL_DEFINE_LD1(MSZ, LANE_SIZE)                                  \
+  void Assembler::ld1##MSZ(const ZRegister& zt,                          \
+                           const PRegisterZ& pg,                         \
+                           const SVEMemOperand& addr) {                  \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                              \
+    SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \
+  }
+#define VIXL_DEFINE_LD2(MSZ, LANE_SIZE)                 \
+  void Assembler::ld2##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const PRegisterZ& pg,        \
+                           const SVEMemOperand& addr) { \
+    USE(zt2);                                           \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2));              \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2));               \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVELd234Helper(2, zt1, pg, addr);                   \
+  }
+#define VIXL_DEFINE_LD3(MSZ, LANE_SIZE)                 \
+  void Assembler::ld3##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const ZRegister& zt3,        \
+                           const PRegisterZ& pg,        \
+                           const SVEMemOperand& addr) { \
+    USE(zt2, zt3);                                      \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3));         \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3));          \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVELd234Helper(3, zt1, pg, addr);                   \
+  }
+#define VIXL_DEFINE_LD4(MSZ, LANE_SIZE)                 \
+  void Assembler::ld4##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const ZRegister& zt3,        \
+                           const ZRegister& zt4,        \
+                           const PRegisterZ& pg,        \
+                           const SVEMemOperand& addr) { \
+    USE(zt2, zt3, zt4);                                 \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4));    \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4));     \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVELd234Helper(4, zt1, pg, addr);                   \
+  }
+
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD1)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD2)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD3)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD4)
+
+#define VIXL_DEFINE_LD1S(MSZ, LANE_SIZE)                                \
+  void Assembler::ld1s##MSZ(const ZRegister& zt,                        \
+                            const PRegisterZ& pg,                       \
+                            const SVEMemOperand& addr) {                \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                             \
+    SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \
+  }
+VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LD1S)
+
+// SVEMem32BitGatherAndUnsizedContiguous.
+
+void Assembler::SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
+                                      const ZRegister& zt,
+                                      const PRegisterZ& pg,
+                                      const SVEMemOperand& addr,
+                                      bool is_signed) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate());
+  VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2);
+  if (is_signed) {
+    // Sign-extension is only possible when the vector elements are larger than
+    // the elements in memory.
+    VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2);
+  }
+
+  int64_t imm = addr.GetImmediateOffset();
+  int divisor = 1 << msize_in_bytes_log2;
+  VIXL_ASSERT(imm % divisor == 0);
+  Instr dtype = SVEDtypeSplit(msize_in_bytes_log2,
+                              zt.GetLaneSizeInBytesLog2(),
+                              is_signed);
+
+  Emit(SVELoadAndBroadcastElementFixed | dtype | RnSP(addr.GetScalarBase()) |
+       ImmUnsignedField<21, 16>(imm / divisor) | Rt(zt) | PgLow8(pg));
+}
+
+// This prototype maps to 4 instruction encodings:
+//  LD1RB_z_p_bi_u16
+//  LD1RB_z_p_bi_u32
+//  LD1RB_z_p_bi_u64
+//  LD1RB_z_p_bi_u8
+void Assembler::ld1rb(const ZRegister& zt,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 3 instruction encodings:
+//  LD1RH_z_p_bi_u16
+//  LD1RH_z_p_bi_u32
+//  LD1RH_z_p_bi_u64
+void Assembler::ld1rh(const ZRegister& zt,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LD1RW_z_p_bi_u32
+//  LD1RW_z_p_bi_u64
+void Assembler::ld1rw(const ZRegister& zt,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kSRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+void Assembler::ld1rd(const ZRegister& zt,
+                      const PRegisterZ& pg,
+                      const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kDRegSizeInBytesLog2, zt, pg, addr, false);
+}
+
+// This prototype maps to 3 instruction encodings:
+//  LD1RSB_z_p_bi_s16
+//  LD1RSB_z_p_bi_s32
+//  LD1RSB_z_p_bi_s64
+void Assembler::ld1rsb(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LD1RSH_z_p_bi_s32
+//  LD1RSH_z_p_bi_s64
+void Assembler::ld1rsh(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+void Assembler::ld1rsw(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  SVELd1BroadcastHelper(kWRegSizeInBytesLog2, zt, pg, addr, true);
+}
+
+void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) {
+  // LDR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+  int64_t imm9 = addr.GetImmediateOffset();
+  VIXL_ASSERT(IsInt9(imm9));
+  Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10;
+  Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16;
+
+  Instr op = LDR_z_bi;
+  if (rt.IsPRegister()) {
+    op = LDR_p_bi;
+  }
+  Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l);
+}
+
+// SVEMem64BitGather.
+
+// This prototype maps to 3 instruction encodings:
+//  LDFF1B_z_p_bz_d_64_unscaled
+//  LDFF1B_z_p_bz_d_x32_unscaled
+void Assembler::ldff1b(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const Register& xn,
+                       const ZRegister& zm) {
+  // LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+  //  1100 0100 010. .... 111. .... .... ....
+  //  msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1B_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LDFF1B_z_p_ai_d
+//  LDFF1B_z_p_ai_s
+void Assembler::ldff1b(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const ZRegister& zn,
+                       int imm5) {
+  // LDFF1B { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0100 001. .... 111. .... .... ....
+  //  msz<24:23> = 00 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1B_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 4 instruction encodings:
+//  LDFF1D_z_p_bz_d_64_scaled
+//  LDFF1D_z_p_bz_d_64_unscaled
+//  LDFF1D_z_p_bz_d_x32_scaled
+//  LDFF1D_z_p_bz_d_x32_unscaled
+void Assembler::ldff1d(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const Register& xn,
+                       const ZRegister& zm) {
+  // LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #3]
+  //  1100 0101 111. .... 111. .... .... ....
+  //  msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1D_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+void Assembler::ldff1d(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const ZRegister& zn,
+                       int imm5) {
+  // LDFF1D { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0101 101. .... 111. .... .... ....
+  //  msz<24:23> = 11 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1D_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+//  LDFF1H_z_p_bz_d_64_scaled
+//  LDFF1H_z_p_bz_d_64_unscaled
+//  LDFF1H_z_p_bz_d_x32_scaled
+//  LDFF1H_z_p_bz_d_x32_unscaled
+void Assembler::ldff1h(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const Register& xn,
+                       const ZRegister& zm) {
+  // LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
+  //  1100 0100 111. .... 111. .... .... ....
+  //  msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1H_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LDFF1H_z_p_ai_d
+//  LDFF1H_z_p_ai_s
+void Assembler::ldff1h(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const ZRegister& zn,
+                       int imm5) {
+  // LDFF1H { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0100 101. .... 111. .... .... ....
+  //  msz<24:23> = 01 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1H_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 3 instruction encodings:
+//  LDFF1SB_z_p_bz_d_64_unscaled
+//  LDFF1SB_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sb(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const Register& xn,
+                        const ZRegister& zm) {
+  // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D]
+  //  1100 0100 010. .... 101. .... .... ....
+  //  msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SB_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LDFF1SB_z_p_ai_d
+//  LDFF1SB_z_p_ai_s
+void Assembler::ldff1sb(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const ZRegister& zn,
+                        int imm5) {
+  // LDFF1SB { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0100 001. .... 101. .... .... ....
+  //  msz<24:23> = 00 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SB_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+       ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+//  LDFF1SH_z_p_bz_d_64_scaled
+//  LDFF1SH_z_p_bz_d_64_unscaled
+//  LDFF1SH_z_p_bz_d_x32_scaled
+//  LDFF1SH_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sh(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const Register& xn,
+                        const ZRegister& zm) {
+  // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #1]
+  //  1100 0100 111. .... 101. .... .... ....
+  //  msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SH_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LDFF1SH_z_p_ai_d
+//  LDFF1SH_z_p_ai_s
+void Assembler::ldff1sh(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const ZRegister& zn,
+                        int imm5) {
+  // LDFF1SH { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0100 101. .... 101. .... .... ....
+  //  msz<24:23> = 01 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SH_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+       ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 4 instruction encodings:
+//  LDFF1SW_z_p_bz_d_64_scaled
+//  LDFF1SW_z_p_bz_d_64_unscaled
+//  LDFF1SW_z_p_bz_d_x32_scaled
+//  LDFF1SW_z_p_bz_d_x32_unscaled
+void Assembler::ldff1sw(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const Register& xn,
+                        const ZRegister& zm) {
+  // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+  //  1100 0101 011. .... 101. .... .... ....
+  //  msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SW_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+void Assembler::ldff1sw(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const ZRegister& zn,
+                        int imm5) {
+  // LDFF1SW { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0101 001. .... 101. .... .... ....
+  //  msz<24:23> = 10 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1SW_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) |
+       ImmField<20, 16>(imm5));
+}
+
+// This prototype maps to 6 instruction encodings:
+//  LDFF1W_z_p_bz_d_64_scaled
+//  LDFF1W_z_p_bz_d_64_unscaled
+//  LDFF1W_z_p_bz_d_x32_scaled
+//  LDFF1W_z_p_bz_d_x32_unscaled
+void Assembler::ldff1w(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const Register& xn,
+                       const ZRegister& zm) {
+  // LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>, <Zm>.D, LSL #2]
+  //  1100 0101 011. .... 111. .... .... ....
+  //  msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5>
+  //  | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1W_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+//  LDFF1W_z_p_ai_d
+//  LDFF1W_z_p_ai_s
+void Assembler::ldff1w(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const ZRegister& zn,
+                       int imm5) {
+  // LDFF1W { <Zt>.D }, <Pg>/Z, [<Zn>.D{, #<imm>}]
+  //  1100 0101 001. .... 111. .... .... ....
+  //  msz<24:23> = 10 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> |
+  //  Zn<9:5> | Zt<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LDFF1W_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5));
+}
+
+void Assembler::SVEGatherPrefetchVectorPlusImmediateHelper(
+    PrefetchOperation prfop,
+    const PRegister& pg,
+    const SVEMemOperand& addr,
+    int prefetch_size) {
+  VIXL_ASSERT(addr.IsVectorPlusImmediate());
+  ZRegister zn = addr.GetVectorBase();
+  VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+
+  Instr op = 0xffffffff;
+  switch (prefetch_size) {
+    case kBRegSize:
+      op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_ai_s)
+                            : static_cast<Instr>(PRFB_i_p_ai_d);
+      break;
+    case kHRegSize:
+      op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_ai_s)
+                            : static_cast<Instr>(PRFH_i_p_ai_d);
+      break;
+    case kSRegSize:
+      op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_ai_s)
+                            : static_cast<Instr>(PRFW_i_p_ai_d);
+      break;
+    case kDRegSize:
+      op = zn.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_ai_s)
+                            : static_cast<Instr>(PRFD_i_p_ai_d);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  int64_t imm5 = addr.GetImmediateOffset();
+  Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | Rn(zn) |
+       ImmUnsignedField<20, 16>(imm5));
+}
+
+void Assembler::SVEGatherPrefetchScalarPlusImmediateHelper(
+    PrefetchOperation prfop,
+    const PRegister& pg,
+    const SVEMemOperand& addr,
+    int prefetch_size) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate());
+  int64_t imm6 = addr.GetImmediateOffset();
+
+  Instr op = 0xffffffff;
+  switch (prefetch_size) {
+    case kBRegSize:
+      op = PRFB_i_p_bi_s;
+      break;
+    case kHRegSize:
+      op = PRFH_i_p_bi_s;
+      break;
+    case kSRegSize:
+      op = PRFW_i_p_bi_s;
+      break;
+    case kDRegSize:
+      op = PRFD_i_p_bi_s;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) |
+       RnSP(addr.GetScalarBase()) | ImmField<21, 16>(imm6));
+}
+
+void Assembler::SVEContiguousPrefetchScalarPlusScalarHelper(
+    PrefetchOperation prfop,
+    const PRegister& pg,
+    const SVEMemOperand& addr,
+    int prefetch_size) {
+  VIXL_ASSERT(addr.IsScalarPlusScalar());
+  Instr op = 0xffffffff;
+
+  switch (prefetch_size) {
+    case kBRegSize:
+      VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER);
+      op = PRFB_i_p_br_s;
+      break;
+    case kHRegSize:
+      VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+      VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2);
+      op = PRFH_i_p_br_s;
+      break;
+    case kSRegSize:
+      VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+      VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2);
+      op = PRFW_i_p_br_s;
+      break;
+    case kDRegSize:
+      VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL);
+      VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2);
+      op = PRFD_i_p_br_s;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+  Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) |
+       RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset()));
+}
+
+void Assembler::SVEContiguousPrefetchScalarPlusVectorHelper(
+    PrefetchOperation prfop,
+    const PRegister& pg,
+    const SVEMemOperand& addr,
+    int prefetch_size) {
+  VIXL_ASSERT(addr.IsScalarPlusVector());
+  ZRegister zm = addr.GetVectorOffset();
+  SVEOffsetModifier mod = addr.GetOffsetModifier();
+
+  // All prefetch scalar-plus-vector addressing modes use a shift corresponding
+  // to the element size.
+  switch (prefetch_size) {
+    case kBRegSize:
+      VIXL_ASSERT(addr.GetShiftAmount() == kBRegSizeInBytesLog2);
+      break;
+    case kHRegSize:
+      VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2);
+      break;
+    case kSRegSize:
+      VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2);
+      break;
+    case kDRegSize:
+      VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  Instr sx = 0;
+  Instr op = 0xffffffff;
+  if ((mod == NO_SVE_OFFSET_MODIFIER) || (mod == SVE_LSL)) {
+    VIXL_ASSERT(zm.IsLaneSizeD());
+
+    switch (prefetch_size) {
+      case kBRegSize:
+        VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+        op = PRFB_i_p_bz_d_64_scaled;
+        break;
+      case kHRegSize:
+        VIXL_ASSERT(mod == SVE_LSL);
+        op = PRFH_i_p_bz_d_64_scaled;
+        break;
+      case kSRegSize:
+        VIXL_ASSERT(mod == SVE_LSL);
+        op = PRFW_i_p_bz_d_64_scaled;
+        break;
+      case kDRegSize:
+        VIXL_ASSERT(mod == SVE_LSL);
+        op = PRFD_i_p_bz_d_64_scaled;
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  } else {
+    VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW));
+    VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD());
+
+    switch (prefetch_size) {
+      case kBRegSize:
+        op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFB_i_p_bz_s_x32_scaled)
+                              : static_cast<Instr>(PRFB_i_p_bz_d_x32_scaled);
+        break;
+      case kHRegSize:
+        op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFH_i_p_bz_s_x32_scaled)
+                              : static_cast<Instr>(PRFH_i_p_bz_d_x32_scaled);
+        break;
+      case kSRegSize:
+        op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFW_i_p_bz_s_x32_scaled)
+                              : static_cast<Instr>(PRFW_i_p_bz_d_x32_scaled);
+        break;
+      case kDRegSize:
+        op = zm.IsLaneSizeS() ? static_cast<Instr>(PRFD_i_p_bz_s_x32_scaled)
+                              : static_cast<Instr>(PRFD_i_p_bz_d_x32_scaled);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+
+    if (mod == SVE_SXTW) {
+      sx = 1 << 22;
+    }
+  }
+
+  Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | sx |
+       RnSP(addr.GetScalarBase()) | Rm(zm));
+}
+
+void Assembler::SVEPrefetchHelper(PrefetchOperation prfop,
+                                  const PRegister& pg,
+                                  const SVEMemOperand& addr,
+                                  int prefetch_size) {
+  if (addr.IsVectorPlusImmediate()) {
+    // For example:
+    //   [z0.s, #0]
+    SVEGatherPrefetchVectorPlusImmediateHelper(prfop, pg, addr, prefetch_size);
+
+  } else if (addr.IsScalarPlusImmediate()) {
+    // For example:
+    //   [x0, #42, mul vl]
+    SVEGatherPrefetchScalarPlusImmediateHelper(prfop, pg, addr, prefetch_size);
+
+  } else if (addr.IsScalarPlusVector()) {
+    // For example:
+    //   [x0, z0.s, sxtw]
+    SVEContiguousPrefetchScalarPlusVectorHelper(prfop, pg, addr, prefetch_size);
+
+  } else if (addr.IsScalarPlusScalar()) {
+    // For example:
+    //   [x0, x1]
+    SVEContiguousPrefetchScalarPlusScalarHelper(prfop, pg, addr, prefetch_size);
+
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+}
+
+void Assembler::prfb(PrefetchOperation prfop,
+                     const PRegister& pg,
+                     const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  SVEPrefetchHelper(prfop, pg, addr, kBRegSize);
+}
+
+void Assembler::prfd(PrefetchOperation prfop,
+                     const PRegister& pg,
+                     const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  SVEPrefetchHelper(prfop, pg, addr, kDRegSize);
+}
+
+void Assembler::prfh(PrefetchOperation prfop,
+                     const PRegister& pg,
+                     const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  SVEPrefetchHelper(prfop, pg, addr, kHRegSize);
+}
+
+void Assembler::prfw(PrefetchOperation prfop,
+                     const PRegister& pg,
+                     const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  SVEPrefetchHelper(prfop, pg, addr, kSRegSize);
+}
+
+void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt,
+                                      const PRegister& pg,
+                                      const SVEMemOperand& addr,
+                                      Instr regoffset_op,
+                                      Instr immoffset_op,
+                                      int imm_divisor) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsScalarPlusScalar() || addr.IsScalarPlusImmediate());
+
+  Instr op;
+  if (addr.IsScalarPlusScalar()) {
+    op = regoffset_op | Rm(addr.GetScalarOffset());
+  } else {
+    int64_t imm = addr.GetImmediateOffset();
+    VIXL_ASSERT(((imm % imm_divisor) == 0) && IsInt4(imm / imm_divisor));
+    op = immoffset_op | ImmField<19, 16>(imm / imm_divisor);
+  }
+  Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase()));
+}
+
+void Assembler::ld1rqb(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0));
+  VIXL_ASSERT(zt.IsLaneSizeB());
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LD1RQB_z_p_br_contiguous,
+                        LD1RQB_z_p_bi_u8,
+                        16);
+}
+
+void Assembler::ld1rqd(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3));
+  VIXL_ASSERT(zt.IsLaneSizeD());
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LD1RQD_z_p_br_contiguous,
+                        LD1RQD_z_p_bi_u64,
+                        16);
+}
+
+void Assembler::ld1rqh(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1));
+  VIXL_ASSERT(zt.IsLaneSizeH());
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LD1RQH_z_p_br_contiguous,
+                        LD1RQH_z_p_bi_u16,
+                        16);
+}
+
+void Assembler::ld1rqw(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2));
+  VIXL_ASSERT(zt.IsLaneSizeS());
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LD1RQW_z_p_br_contiguous,
+                        LD1RQW_z_p_bi_u32,
+                        16);
+}
+
+#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE)                                  \
+  void Assembler::ldff1##MSZ(const ZRegister& zt,                          \
+                             const PRegisterZ& pg,                         \
+                             const SVEMemOperand& addr) {                  \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                                \
+    SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \
+  }
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LDFF1)
+
+#define VIXL_DEFINE_LDFF1S(MSZ, LANE_SIZE)                                \
+  void Assembler::ldff1s##MSZ(const ZRegister& zt,                        \
+                              const PRegisterZ& pg,                       \
+                              const SVEMemOperand& addr) {                \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                               \
+    SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \
+  }
+VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LDFF1S)
+
+void Assembler::ldnf1b(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(0,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ false,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1d(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(3,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ false,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1h(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(1,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ false,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sb(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(0,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ true,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sh(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(1,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ true,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1sw(const ZRegister& zt,
+                        const PRegisterZ& pg,
+                        const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(2,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ true,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnf1w(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(addr.IsPlainRegister() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+
+  SVELdSt1Helper(2,
+                 zt,
+                 pg,
+                 addr,
+                 /* is_signed = */ false,
+                 SVEContiguousNonFaultLoad_ScalarPlusImmFixed);
+}
+
+void Assembler::ldnt1b(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LDNT1B_z_p_br_contiguous,
+                        LDNT1B_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1d(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LDNT1D_z_p_br_contiguous,
+                        LDNT1D_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1h(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LDNT1H_z_p_br_contiguous,
+                        LDNT1H_z_p_bi_contiguous);
+}
+
+void Assembler::ldnt1w(const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        LDNT1W_z_p_br_contiguous,
+                        LDNT1W_z_p_bi_contiguous);
+}
+
+Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2,
+                                     int num_regs,
+                                     const SVEMemOperand& addr,
+                                     bool is_load) {
+  VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4));
+
+  Instr op = 0xfffffff;
+  if (addr.IsScalarPlusImmediate()) {
+    VIXL_ASSERT((addr.GetImmediateOffset() == 0) || addr.IsMulVl());
+    int64_t imm = addr.GetImmediateOffset();
+    VIXL_ASSERT((imm % num_regs) == 0);
+    op = RnSP(addr.GetScalarBase()) | ImmField<19, 16>(imm / num_regs);
+
+  } else if (addr.IsScalarPlusScalar()) {
+    VIXL_ASSERT(addr.GetScalarOffset().IsZero() ||
+                addr.IsEquivalentToLSL(msize_in_bytes_log2));
+    op = RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset());
+
+  } else if (addr.IsVectorPlusImmediate()) {
+    ZRegister zn = addr.GetVectorBase();
+    uint64_t imm = addr.GetImmediateOffset();
+    VIXL_ASSERT(num_regs == 1);
+    VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+    VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2)));
+    op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2);
+
+  } else if (addr.IsScalarPlusVector()) {
+    // We have to support several different addressing modes. Some instructions
+    // support a subset of these, but the SVEMemOperand encoding is consistent.
+    Register xn = addr.GetScalarBase();
+    ZRegister zm = addr.GetVectorOffset();
+    SVEOffsetModifier mod = addr.GetOffsetModifier();
+    Instr modifier_bit = 1 << (is_load ? 22 : 14);
+    Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0;
+    VIXL_ASSERT(num_regs == 1);
+
+    if (mod == SVE_LSL) {
+      // 64-bit scaled offset:            [<Xn|SP>, <Zm>.D, LSL #<shift>]
+      VIXL_ASSERT(zm.IsLaneSizeD());
+      VIXL_ASSERT(addr.GetShiftAmount() == msize_in_bytes_log2);
+    } else if (mod == NO_SVE_OFFSET_MODIFIER) {
+      // 64-bit unscaled offset:          [<Xn|SP>, <Zm>.D]
+      VIXL_ASSERT(zm.IsLaneSizeD());
+      VIXL_ASSERT(addr.GetShiftAmount() == 0);
+    } else {
+      // 32-bit scaled offset:            [<Xn|SP>, <Zm>.S, <mod> #<shift>]
+      // 32-bit unscaled offset:          [<Xn|SP>, <Zm>.S, <mod>]
+      // 32-bit unpacked scaled offset:   [<Xn|SP>, <Zm>.D, <mod> #<shift>]
+      // 32-bit unpacked unscaled offset: [<Xn|SP>, <Zm>.D, <mod>]
+      VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD());
+      VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW));
+      VIXL_ASSERT((addr.GetShiftAmount() == 0) ||
+                  (addr.GetShiftAmount() == msize_in_bytes_log2));
+    }
+
+    // The form itself is encoded in the instruction opcode.
+    op = RnSP(xn) | Rm(zm) | xs;
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  return op;
+}
+
+// SVEMemStore.
+
+void Assembler::SVESt1Helper(unsigned msize_in_bytes_log2,
+                             const ZRegister& zt,
+                             const PRegister& pg,
+                             const SVEMemOperand& addr) {
+  if (addr.IsScalarPlusScalar()) {
+    // Rm must not be xzr.
+    VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+  }
+
+  if (addr.IsScatterGather()) {
+    bool is_load = false;
+    bool is_signed = false;
+    bool is_ff = false;
+    SVEScatterGatherHelper(msize_in_bytes_log2,
+                           zt,
+                           pg,
+                           addr,
+                           is_load,
+                           is_signed,
+                           is_ff);
+    return;
+  }
+
+  Instr op;
+  if (addr.IsScalarPlusImmediate()) {
+    op = SVEContiguousStore_ScalarPlusImmFixed;
+  } else if (addr.IsScalarPlusScalar()) {
+    op = SVEContiguousStore_ScalarPlusScalarFixed;
+  } else {
+    VIXL_UNIMPLEMENTED();
+    op = 0xffffffff;
+  }
+  SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, false, op);
+}
+
+void Assembler::SVESt234Helper(int num_regs,
+                               const ZRegister& zt1,
+                               const PRegister& pg,
+                               const SVEMemOperand& addr) {
+  if (addr.IsScalarPlusScalar()) {
+    // Rm must not be xzr.
+    VIXL_ASSERT(!addr.GetScalarOffset().IsZero());
+  }
+
+  Instr op;
+  if (addr.IsScalarPlusImmediate()) {
+    op = SVEStoreMultipleStructures_ScalarPlusImmFixed;
+  } else if (addr.IsScalarPlusScalar()) {
+    op = SVEStoreMultipleStructures_ScalarPlusScalarFixed;
+  } else {
+    // These instructions don't support any other addressing modes.
+    VIXL_ABORT();
+  }
+  SVELdSt234Helper(num_regs, zt1, pg, addr, op);
+}
+
+#define VIXL_DEFINE_ST1(MSZ, LANE_SIZE)                           \
+  void Assembler::st1##MSZ(const ZRegister& zt,                   \
+                           const PRegister& pg,                   \
+                           const SVEMemOperand& addr) {           \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));                       \
+    SVESt1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr); \
+  }
+#define VIXL_DEFINE_ST2(MSZ, LANE_SIZE)                 \
+  void Assembler::st2##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const PRegister& pg,         \
+                           const SVEMemOperand& addr) { \
+    USE(zt2);                                           \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2));              \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2));               \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVESt234Helper(2, zt1, pg, addr);                   \
+  }
+#define VIXL_DEFINE_ST3(MSZ, LANE_SIZE)                 \
+  void Assembler::st3##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const ZRegister& zt3,        \
+                           const PRegister& pg,         \
+                           const SVEMemOperand& addr) { \
+    USE(zt2, zt3);                                      \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3));         \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3));          \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVESt234Helper(3, zt1, pg, addr);                   \
+  }
+#define VIXL_DEFINE_ST4(MSZ, LANE_SIZE)                 \
+  void Assembler::st4##MSZ(const ZRegister& zt1,        \
+                           const ZRegister& zt2,        \
+                           const ZRegister& zt3,        \
+                           const ZRegister& zt4,        \
+                           const PRegister& pg,         \
+                           const SVEMemOperand& addr) { \
+    USE(zt2, zt3, zt4);                                 \
+    VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));             \
+    VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4));    \
+    VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4));     \
+    VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE());           \
+    SVESt234Helper(4, zt1, pg, addr);                   \
+  }
+
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST1)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3)
+VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4)
+
+void Assembler::stnt1b(const ZRegister& zt,
+                       const PRegister& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        STNT1B_z_p_br_contiguous,
+                        STNT1B_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1d(const ZRegister& zt,
+                       const PRegister& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        STNT1D_z_p_br_contiguous,
+                        STNT1D_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1h(const ZRegister& zt,
+                       const PRegister& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        STNT1H_z_p_br_contiguous,
+                        STNT1H_z_p_bi_contiguous);
+}
+
+void Assembler::stnt1w(const ZRegister& zt,
+                       const PRegister& pg,
+                       const SVEMemOperand& addr) {
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
+              (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
+  SVELd1St1ScaImmHelper(zt,
+                        pg,
+                        addr,
+                        STNT1W_z_p_br_contiguous,
+                        STNT1W_z_p_bi_contiguous);
+}
+
+void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) {
+  // STR <Pt/Zt>, [<Xn|SP>{, #<imm>, MUL VL}]
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+  VIXL_ASSERT(addr.IsPlainScalar() ||
+              (addr.IsScalarPlusImmediate() &&
+               (addr.GetOffsetModifier() == SVE_MUL_VL)));
+  int64_t imm9 = addr.GetImmediateOffset();
+  VIXL_ASSERT(IsInt9(imm9));
+  Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10;
+  Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16;
+
+  Instr op = STR_z_bi;
+  if (rt.IsPRegister()) {
+    op = STR_p_bi;
+  }
+  Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l);
+}
+
+// SVEMulIndex.
+
+void Assembler::sdot(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm,
+                     int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+
+  Instr op = 0xffffffff;
+  switch (zda.GetLaneSizeInBits()) {
+    case kSRegSize:
+      VIXL_ASSERT(IsUint2(index));
+      op = SDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn);
+      break;
+    case kDRegSize:
+      VIXL_ASSERT(IsUint1(index));
+      op = SDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  Emit(op);
+}
+
+void Assembler::udot(const ZRegister& zda,
+                     const ZRegister& zn,
+                     const ZRegister& zm,
+                     int index) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+  VIXL_ASSERT(AreSameLaneSize(zn, zm));
+
+  Instr op = 0xffffffff;
+  switch (zda.GetLaneSizeInBits()) {
+    case kSRegSize:
+      VIXL_ASSERT(IsUint2(index));
+      op = UDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn);
+      break;
+    case kDRegSize:
+      VIXL_ASSERT(IsUint1(index));
+      op = UDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  Emit(op);
+}
+
+// SVEPartitionBreak.
+
+void Assembler::brka(const PRegisterWithLaneSize& pd,
+                     const PRegister& pg,
+                     const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+  Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000;
+  Emit(BRKA_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn));
+}
+
+void Assembler::brkas(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+  Emit(BRKAS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkb(const PRegisterWithLaneSize& pd,
+                     const PRegister& pg,
+                     const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+  Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000;
+  Emit(BRKB_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn));
+}
+
+void Assembler::brkbs(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+
+  Emit(BRKBS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkn(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  USE(pm);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+  VIXL_ASSERT(pd.Is(pm));
+
+  Emit(BRKN_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+void Assembler::brkns(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn,
+                      const PRegisterWithLaneSize& pm) {
+  USE(pm);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB());
+  VIXL_ASSERT(pd.Is(pm));
+
+  Emit(BRKNS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+// SVEPermutePredicate.
+
+void Assembler::punpkhi(const PRegisterWithLaneSize& pd,
+                        const PRegisterWithLaneSize& pn) {
+  // PUNPKHI <Pd>.H, <Pn>.B
+  //  0000 0101 0011 0001 0100 000. ...0 ....
+  //  H<16> = 1 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeH());
+  VIXL_ASSERT(pn.IsLaneSizeB());
+
+  Emit(PUNPKHI_p_p | Pd(pd) | Pn(pn));
+}
+
+void Assembler::punpklo(const PRegisterWithLaneSize& pd,
+                        const PRegisterWithLaneSize& pn) {
+  // PUNPKLO <Pd>.H, <Pn>.B
+  //  0000 0101 0011 0000 0100 000. ...0 ....
+  //  H<16> = 0 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.IsLaneSizeH());
+  VIXL_ASSERT(pn.IsLaneSizeB());
+
+  Emit(PUNPKLO_p_p | Pd(pd) | Pn(pn));
+}
+
+void Assembler::rev(const PRegisterWithLaneSize& pd,
+                    const PRegisterWithLaneSize& pn) {
+  // REV <Pd>.<T>, <Pn>.<T>
+  //  0000 0101 ..11 0100 0100 000. ...0 ....
+  //  size<23:22> | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn));
+
+  Emit(REV_p_p | SVESize(pd) | Pd(pd) | Rx<8, 5>(pn));
+}
+
+void Assembler::trn1(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // TRN1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0101 000. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(TRN1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::trn2(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // TRN2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0101 010. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(TRN2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::uzp1(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // UZP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0100 100. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(UZP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::uzp2(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // UZP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0100 110. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(UZP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::zip1(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // ZIP1 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0100 000. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 0 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(ZIP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::zip2(const PRegisterWithLaneSize& pd,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  // ZIP2 <Pd>.<T>, <Pn>.<T>, <Pm>.<T>
+  //  0000 0101 ..10 .... 0100 010. ...0 ....
+  //  size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 1 | Pn<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(pd, pn, pm));
+
+  Emit(ZIP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm));
+}
+
+// SVEPermuteVectorExtract.
+
+void Assembler::ext(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm,
+                    unsigned offset) {
+  // EXT <Zdn>.B, <Zdn>.B, <Zm>.B, #<imm>
+  //  0000 0101 001. .... 000. .... .... ....
+  //  imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(IsUint8(offset));
+
+  int imm8h = ExtractUnsignedBitfield32(7, 3, offset);
+  int imm8l = ExtractUnsignedBitfield32(2, 0, offset);
+  Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) |
+       ImmUnsignedField<12, 10>(imm8l));
+}
+
+// SVEPermuteVectorInterleaving.
+
+void Assembler::trn1(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // TRN1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0111 00.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(TRN1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::trn2(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // TRN2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0111 01.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(TRN2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uzp1(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UZP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0110 10.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(UZP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uzp2(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // UZP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0110 11.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(UZP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::zip1(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // ZIP1 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0110 00.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(ZIP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::zip2(const ZRegister& zd,
+                     const ZRegister& zn,
+                     const ZRegister& zm) {
+  // ZIP2 <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+  //  0000 0101 ..1. .... 0110 01.. .... ....
+  //  size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(ZIP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+// SVEPermuteVectorPredicated.
+
+void Assembler::clasta(const Register& rd,
+                       const PRegister& pg,
+                       const Register& rn,
+                       const ZRegister& zm) {
+  // CLASTA <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
+  //  0000 0101 ..11 0000 101. .... .... ....
+  //  size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Rdn<4:0>
+
+  USE(rn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rd.Is(rn));
+
+  Emit(CLASTA_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clasta(const VRegister& vd,
+                       const PRegister& pg,
+                       const VRegister& vn,
+                       const ZRegister& zm) {
+  // CLASTA <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+  //  0000 0101 ..10 1010 100. .... .... ....
+  //  size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+  USE(vn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.Is(vn));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(AreSameLaneSize(vd, zm));
+
+  Emit(CLASTA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clasta(const ZRegister& zd,
+                       const PRegister& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // CLASTA <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0101 ..10 1000 100. .... .... ....
+  //  size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(CLASTA_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const Register& rd,
+                       const PRegister& pg,
+                       const Register& rn,
+                       const ZRegister& zm) {
+  // CLASTB <R><dn>, <Pg>, <R><dn>, <Zm>.<T>
+  //  0000 0101 ..11 0001 101. .... .... ....
+  //  size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Rdn<4:0>
+
+  USE(rn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(rd.Is(rn));
+
+  Emit(CLASTB_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const VRegister& vd,
+                       const PRegister& pg,
+                       const VRegister& vn,
+                       const ZRegister& zm) {
+  // CLASTB <V><dn>, <Pg>, <V><dn>, <Zm>.<T>
+  //  0000 0101 ..10 1011 100. .... .... ....
+  //  size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Vdn<4:0>
+
+  USE(vn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.Is(vn));
+  VIXL_ASSERT(vd.IsScalar());
+  VIXL_ASSERT(AreSameLaneSize(vd, zm));
+
+  Emit(CLASTB_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::clastb(const ZRegister& zd,
+                       const PRegister& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // CLASTB <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0101 ..10 1001 100. .... .... ....
+  //  size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(CLASTB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::compact(const ZRegister& zd,
+                        const PRegister& pg,
+                        const ZRegister& zn) {
+  // COMPACT <Zd>.<T>, <Pg>, <Zn>.<T>
+  //  0000 0101 1.10 0001 100. .... .... ....
+  //  sz<22> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT((zd.GetLaneSizeInBits() == kSRegSize) ||
+              (zd.GetLaneSizeInBits() == kDRegSize));
+
+  Instr sz = (zd.GetLaneSizeInBits() == kDRegSize) ? (1 << 22) : 0;
+  Emit(COMPACT_z_p_z | sz | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::cpy(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const Register& rn) {
+  // CPY <Zd>.<T>, <Pg>/M, <R><n|SP>
+  //  0000 0101 ..10 1000 101. .... .... ....
+  //  size<23:22> | Pg<12:10> | Rn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(static_cast<unsigned>(rn.GetSizeInBits()) >=
+              zd.GetLaneSizeInBits());
+
+  Emit(CPY_z_p_r | SVESize(zd) | Rd(zd) | PgLow8(pg) | RnSP(rn));
+}
+
+void Assembler::cpy(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const VRegister& vn) {
+  // CPY <Zd>.<T>, <Pg>/M, <V><n>
+  //  0000 0101 ..10 0000 100. .... .... ....
+  //  size<23:22> | Pg<12:10> | Vn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vn.IsScalar());
+  VIXL_ASSERT(static_cast<unsigned>(vn.GetSizeInBits()) ==
+              zd.GetLaneSizeInBits());
+
+  Emit(CPY_z_p_v | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(vn));
+}
+
+void Assembler::lasta(const Register& rd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // LASTA <R><d>, <Pg>, <Zn>.<T>
+  //  0000 0101 ..10 0000 101. .... .... ....
+  //  size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LASTA_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lasta(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // LASTA <V><d>, <Pg>, <Zn>.<T>
+  //  0000 0101 ..10 0010 100. .... .... ....
+  //  size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(LASTA_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lastb(const Register& rd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // LASTB <R><d>, <Pg>, <Zn>.<T>
+  //  0000 0101 ..10 0001 101. .... .... ....
+  //  size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(LASTB_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::lastb(const VRegister& vd,
+                      const PRegister& pg,
+                      const ZRegister& zn) {
+  // LASTB <V><d>, <Pg>, <Zn>.<T>
+  //  0000 0101 ..10 0011 100. .... .... ....
+  //  size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Vd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vd.IsScalar());
+
+  Emit(LASTB_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::rbit(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // RBIT <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0101 ..10 0111 100. .... .... ....
+  //  size<23:22> | opc<17:16> = 11 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+  Emit(RBIT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revb(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // REVB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0101 ..10 0100 100. .... .... ....
+  //  size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.IsLaneSizeH() || zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(REVB_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revh(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // REVH <Zd>.<T>, <Pg>/M, <Zn>.<T>
+  //  0000 0101 ..10 0101 100. .... .... ....
+  //  size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+  Emit(REVH_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::revw(const ZRegister& zd,
+                     const PRegisterM& pg,
+                     const ZRegister& zn) {
+  // REVW <Zd>.D, <Pg>/M, <Zn>.D
+  //  0000 0101 ..10 0110 100. .... .... ....
+  //  size<23:22> | opc<17:16> = 10 | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  VIXL_ASSERT(zd.IsLaneSizeD());
+
+  Emit(REVW_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::splice(const ZRegister& zd,
+                       const PRegister& pg,
+                       const ZRegister& zn,
+                       const ZRegister& zm) {
+  // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+  //  0000 0101 ..10 1100 100. .... .... ....
+  //  size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+  USE(zn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.Is(zn));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+// SVEPermuteVectorUnpredicated.
+
+void Assembler::dup(const ZRegister& zd, const Register& xn) {
+  // DUP <Zd>.<T>, <R><n|SP>
+  //  0000 0101 ..10 0000 0011 10.. .... ....
+  //  size<23:22> | Rn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(DUP_z_r | SVESize(zd) | Rd(zd) | RnSP(xn));
+}
+
+void Assembler::dup(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+  // DUP <Zd>.<T>, <Zn>.<T>[<imm>]
+  //  0000 0101 ..1. .... 0010 00.. .... ....
+  //  imm2<23:22> | tsz<20:16> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(zd, zn));
+  VIXL_ASSERT((index * zd.GetLaneSizeInBits()) < 512);
+  int n = zd.GetLaneSizeInBytesLog2();
+  unsigned imm_7 = (index << (n + 1)) | (1 << n);
+  VIXL_ASSERT(IsUint7(imm_7));
+  unsigned imm_2 = ExtractUnsignedBitfield32(6, 5, imm_7);
+  unsigned tsz_5 = ExtractUnsignedBitfield32(4, 0, imm_7);
+
+  Emit(DUP_z_zi | ImmUnsignedField<23, 22>(imm_2) |
+       ImmUnsignedField<20, 16>(tsz_5) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::insr(const ZRegister& zdn, const Register& rm) {
+  // INSR <Zdn>.<T>, <R><m>
+  //  0000 0101 ..10 0100 0011 10.. .... ....
+  //  size<23:22> | Rm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(INSR_z_r | SVESize(zdn) | Rd(zdn) | Rn(rm));
+}
+
+void Assembler::insr(const ZRegister& zdn, const VRegister& vm) {
+  // INSR <Zdn>.<T>, <V><m>
+  //  0000 0101 ..11 0100 0011 10.. .... ....
+  //  size<23:22> | Vm<9:5> | Zdn<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(vm.IsScalar());
+
+  Emit(INSR_z_v | SVESize(zdn) | Rd(zdn) | Rn(vm));
+}
+
+void Assembler::rev(const ZRegister& zd, const ZRegister& zn) {
+  // REV <Zd>.<T>, <Zn>.<T>
+  //  0000 0101 ..11 1000 0011 10.. .... ....
+  //  size<23:22> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(zd, zn));
+
+  Emit(REV_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sunpkhi(const ZRegister& zd, const ZRegister& zn) {
+  // SUNPKHI <Zd>.<T>, <Zn>.<Tb>
+  //  0000 0101 ..11 0001 0011 10.. .... ....
+  //  size<23:22> | U<17> = 0 | H<16> = 1 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+  VIXL_ASSERT(!zd.IsLaneSizeB());
+
+  Emit(SUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sunpklo(const ZRegister& zd, const ZRegister& zn) {
+  // SUNPKLO <Zd>.<T>, <Zn>.<Tb>
+  //  0000 0101 ..11 0000 0011 10.. .... ....
+  //  size<23:22> | U<17> = 0 | H<16> = 0 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+  VIXL_ASSERT(!zd.IsLaneSizeB());
+
+  Emit(SUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::tbl(const ZRegister& zd,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  // TBL <Zd>.<T>, { <Zn>.<T> }, <Zm>.<T>
+  //  0000 0101 ..1. .... 0011 00.. .... ....
+  //  size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(zd, zn, zm));
+
+  Emit(TBL_z_zz_1 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uunpkhi(const ZRegister& zd, const ZRegister& zn) {
+  // UUNPKHI <Zd>.<T>, <Zn>.<Tb>
+  //  0000 0101 ..11 0011 0011 10.. .... ....
+  //  size<23:22> | U<17> = 1 | H<16> = 1 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+  VIXL_ASSERT(!zd.IsLaneSizeB());
+
+  Emit(UUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+void Assembler::uunpklo(const ZRegister& zd, const ZRegister& zn) {
+  // UUNPKLO <Zd>.<T>, <Zn>.<Tb>
+  //  0000 0101 ..11 0010 0011 10.. .... ....
+  //  size<23:22> | U<17> = 1 | H<16> = 0 | Zn<9:5> | Zd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+  VIXL_ASSERT(!zd.IsLaneSizeB());
+
+  Emit(UUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn));
+}
+
+// SVEPredicateCount.
+
+void Assembler::cntp(const Register& xd,
+                     const PRegister& pg,
+                     const PRegisterWithLaneSize& pn) {
+  // CNTP <Xd>, <Pg>, <Pn>.<T>
+  //  0010 0101 ..10 0000 10.. ..0. .... ....
+  //  size<23:22> | opc<18:16> = 000 | Pg<13:10> | o2<9> = 0 | Pn<8:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX());
+  VIXL_ASSERT(pg.IsUnqualified());
+  if (pg.HasLaneSize()) VIXL_ASSERT(AreSameFormat(pg, pn));
+
+  Emit(CNTP_r_p_p | SVESize(pn) | Rd(xd) | Pg<13, 10>(pg) | Pn(pn));
+}
+
+// SVEPredicateLogicalOp.
+void Assembler::and_(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(AND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::ands(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(ANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::bic(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(BIC_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::bics(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(BICS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::eor(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(EOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::eors(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(EORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nand(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(NAND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nands(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn,
+                      const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(NANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nor(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(NOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::nors(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(NORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orn(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(ORN_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orns(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(ORNS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orr(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(ORR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::orrs(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn,
+                     const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameFormat(pd, pn, pm));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  Emit(ORRS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::sel(const PRegisterWithLaneSize& pd,
+                    const PRegister& pg,
+                    const PRegisterWithLaneSize& pn,
+                    const PRegisterWithLaneSize& pm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  Emit(SEL_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+// SVEPredicateMisc.
+
+void Assembler::pfalse(const PRegisterWithLaneSize& pd) {
+  // PFALSE <Pd>.B
+  //  0010 0101 0001 1000 1110 0100 0000 ....
+  //  op<23> = 0 | S<22> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  // Ignore the lane size, since it makes no difference to the operation.
+
+  Emit(PFALSE_p | Pd(pd));
+}
+
+void Assembler::pfirst(const PRegisterWithLaneSize& pd,
+                       const PRegister& pg,
+                       const PRegisterWithLaneSize& pn) {
+  // PFIRST <Pdn>.B, <Pg>, <Pdn>.B
+  //  0010 0101 0101 1000 1100 000. ...0 ....
+  //  op<23> = 0 | S<22> = 1 | Pg<8:5> | Pdn<3:0>
+
+  USE(pn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.Is(pn));
+  VIXL_ASSERT(pd.IsLaneSizeB());
+
+  Emit(PFIRST_p_p_p | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::pnext(const PRegisterWithLaneSize& pd,
+                      const PRegister& pg,
+                      const PRegisterWithLaneSize& pn) {
+  // PNEXT <Pdn>.<T>, <Pg>, <Pdn>.<T>
+  //  0010 0101 ..01 1001 1100 010. ...0 ....
+  //  size<23:22> | Pg<8:5> | Pdn<3:0>
+
+  USE(pn);
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pd.Is(pn));
+
+  Emit(PNEXT_p_p_p | SVESize(pd) | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) {
+  // PTEST <Pg>, <Pn>.B
+  //  0010 0101 0101 0000 11.. ..0. ...0 0000
+  //  op<23> = 0 | S<22> = 1 | Pg<13:10> | Pn<8:5> | opc2<3:0> = 0000
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(pn.IsLaneSizeB());
+
+  Emit(PTEST_p_p | Pg<13, 10>(pg) | Rx<8, 5>(pn));
+}
+
+void Assembler::ptrue(const PRegisterWithLaneSize& pd, int pattern) {
+  // PTRUE <Pd>.<T>{, <pattern>}
+  //  0010 0101 ..01 1000 1110 00.. ...0 ....
+  //  size<23:22> | S<16> = 0 | pattern<9:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(PTRUE_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern));
+}
+
+void Assembler::ptrues(const PRegisterWithLaneSize& pd, int pattern) {
+  // PTRUES <Pd>.<T>{, <pattern>}
+  //  0010 0101 ..01 1001 1110 00.. ...0 ....
+  //  size<23:22> | S<16> = 1 | pattern<9:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(PTRUES_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern));
+}
+
+void Assembler::rdffr(const PRegisterWithLaneSize& pd) {
+  // RDFFR <Pd>.B
+  //  0010 0101 0001 1001 1111 0000 0000 ....
+  //  op<23> = 0 | S<22> = 0 | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(RDFFR_p_f | Pd(pd));
+}
+
+void Assembler::rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+  // RDFFR <Pd>.B, <Pg>/Z
+  //  0010 0101 0001 1000 1111 000. ...0 ....
+  //  op<23> = 0 | S<22> = 0 | Pg<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(RDFFR_p_p_f | Pd(pd) | Pg<8, 5>(pg));
+}
+
+void Assembler::rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+  // RDFFRS <Pd>.B, <Pg>/Z
+  //  0010 0101 0101 1000 1111 000. ...0 ....
+  //  op<23> = 0 | S<22> = 1 | Pg<8:5> | Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(RDFFRS_p_p_f | Pd(pd) | Pg<8, 5>(pg));
+}
+
+// SVEPropagateBreak.
+
+void Assembler::brkpa(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn,
+                      const PRegisterWithLaneSize& pm) {
+  // BRKPA <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+  //  0010 0101 0000 .... 11.. ..0. ...0 ....
+  //  op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(BRKPA_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpas(const PRegisterWithLaneSize& pd,
+                       const PRegisterZ& pg,
+                       const PRegisterWithLaneSize& pn,
+                       const PRegisterWithLaneSize& pm) {
+  // BRKPAS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+  //  0010 0101 0100 .... 11.. ..0. ...0 ....
+  //  op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(BRKPAS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpb(const PRegisterWithLaneSize& pd,
+                      const PRegisterZ& pg,
+                      const PRegisterWithLaneSize& pn,
+                      const PRegisterWithLaneSize& pm) {
+  // BRKPB <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+  //  0010 0101 0000 .... 11.. ..0. ...1 ....
+  //  op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(BRKPB_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+void Assembler::brkpbs(const PRegisterWithLaneSize& pd,
+                       const PRegisterZ& pg,
+                       const PRegisterWithLaneSize& pn,
+                       const PRegisterWithLaneSize& pm) {
+  // BRKPBS <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
+  //  0010 0101 0100 .... 11.. ..0. ...1 ....
+  //  op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 |
+  //  Pd<3:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(BRKPBS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm));
+}
+
+// SVEStackFrameAdjustment.
+
+void Assembler::addpl(const Register& xd, const Register& xn, int imm6) {
+  // ADDPL <Xd|SP>, <Xn|SP>, #<imm>
+  //  0000 0100 011. .... 0101 0... .... ....
+  //  op<22> = 1 | Rn<20:16> | imm6<10:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX());
+  VIXL_ASSERT(xn.IsX());
+
+  Emit(ADDPL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6));
+}
+
+void Assembler::addvl(const Register& xd, const Register& xn, int imm6) {
+  // ADDVL <Xd|SP>, <Xn|SP>, #<imm>
+  //  0000 0100 001. .... 0101 0... .... ....
+  //  op<22> = 0 | Rn<20:16> | imm6<10:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX());
+  VIXL_ASSERT(xn.IsX());
+
+  Emit(ADDVL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6));
+}
+
+// SVEStackFrameSize.
+
+void Assembler::rdvl(const Register& xd, int imm6) {
+  // RDVL <Xd>, #<imm>
+  //  0000 0100 1011 1111 0101 0... .... ....
+  //  op<22> = 0 | opc2<20:16> = 11111 | imm6<10:5> | Rd<4:0>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(xd.IsX());
+
+  Emit(RDVL_r_i | Rd(xd) | ImmField<10, 5>(imm6));
+}
+
+// SVEVectorSelect.
+
+void Assembler::sel(const ZRegister& zd,
+                    const PRegister& pg,
+                    const ZRegister& zn,
+                    const ZRegister& zm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+  Emit(SEL_z_p_zz | SVESize(zd) | Rd(zd) | Pg<13, 10>(pg) | Rn(zn) | Rm(zm));
+}
+
+// SVEWriteFFR.
+
+void Assembler::setffr() {
+  // SETFFR
+  //  0010 0101 0010 1100 1001 0000 0000 0000
+  //  opc<23:22> = 00
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(SETFFR_f);
+}
+
+void Assembler::wrffr(const PRegisterWithLaneSize& pn) {
+  // WRFFR <Pn>.B
+  //  0010 0101 0010 1000 1001 000. ...0 0000
+  //  opc<23:22> = 00 | Pn<8:5>
+
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+
+  Emit(WRFFR_f_p | Rx<8, 5>(pn));
+}
+
+// Aliases.
+
+void Assembler::bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  and_(zd, zn, ~imm);
+}
+
+void Assembler::eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  eor(zd, zn, ~imm);
+}
+
+void Assembler::orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+  orr(zd, zn, ~imm);
+}
+
+
+void Assembler::fmov(const ZRegister& zd, const PRegisterM& pg, double imm) {
+  if (IsPositiveZero(imm)) {
+    cpy(zd, pg, 0);
+  } else {
+    fcpy(zd, pg, imm);
+  }
+}
+
+void Assembler::fmov(const ZRegister& zd, double imm) {
+  if (IsPositiveZero(imm)) {
+    dup(zd, imm);
+  } else {
+    fdup(zd, imm);
+  }
+}
+
+void Assembler::mov(const PRegister& pd, const PRegister& pn) {
+  // If the inputs carry a lane size, they must match.
+  VIXL_ASSERT((!pd.HasLaneSize() && !pn.HasLaneSize()) ||
+              AreSameLaneSize(pd, pn));
+  orr(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB());
+}
+
+void Assembler::mov(const PRegisterWithLaneSize& pd,
+                    const PRegisterM& pg,
+                    const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  sel(pd, pg, pn, pd);
+}
+
+void Assembler::mov(const PRegisterWithLaneSize& pd,
+                    const PRegisterZ& pg,
+                    const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  and_(pd, pg, pn, pn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+                    const PRegister& pg,
+                    int imm8,
+                    int shift) {
+  VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+  cpy(zd, pg, imm8, shift);
+}
+
+void Assembler::mov(const ZRegister& zd, const Register& xn) { dup(zd, xn); }
+
+void Assembler::mov(const ZRegister& zd, const VRegister& vn) {
+  VIXL_ASSERT(vn.IsScalar());
+  VIXL_ASSERT(AreSameLaneSize(zd, vn));
+  dup(zd, vn.Z().WithSameLaneSizeAs(vn), 0);
+}
+
+void Assembler::mov(const ZRegister& zd, const ZRegister& zn) {
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  orr(zd.VnD(), zn.VnD(), zn.VnD());
+}
+
+void Assembler::mov(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  dup(zd, zn, index);
+}
+
+void Assembler::mov(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const Register& rn) {
+  cpy(zd, pg, rn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const VRegister& vn) {
+  VIXL_ASSERT(vn.IsScalar());
+  VIXL_ASSERT(AreSameLaneSize(zd, vn));
+  cpy(zd, pg, vn);
+}
+
+void Assembler::mov(const ZRegister& zd,
+                    const PRegisterM& pg,
+                    const ZRegister& zn) {
+  VIXL_ASSERT(AreSameLaneSize(zd, zn));
+  sel(zd, pg, zn, zd);
+}
+
+void Assembler::mov(const ZRegister& zd, uint64_t imm) {
+  // Mov is an alias of dupm for certain values of imm. Whilst this matters in
+  // the disassembler, for the assembler, we don't distinguish between the
+  // two mnemonics, and simply call dupm.
+  dupm(zd, imm);
+}
+
+void Assembler::mov(const ZRegister& zd, int imm8, int shift) {
+  dup(zd, imm8, shift);
+}
+
+void Assembler::movs(const PRegister& pd, const PRegister& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  orrs(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB());
+}
+
+void Assembler::movs(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  ands(pd, pg, pn, pn);
+}
+
+void Assembler::not_(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  eor(pd, pg, pn, pg.VnB());
+}
+
+void Assembler::nots(const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+  eors(pd, pg, pn, pg.VnB());
+}
+
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index 36f5568d..bf93918d 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -34,6 +34,8 @@ namespace aarch64 {
 
 const unsigned kNumberOfRegisters = 32;
 const unsigned kNumberOfVRegisters = 32;
+const unsigned kNumberOfZRegisters = kNumberOfVRegisters;
+const unsigned kNumberOfPRegisters = 16;
 // Callee saved registers are x21-x30(lr).
 const int kNumberOfCalleeSavedRegisters = 10;
 const int kFirstCalleeSavedRegisterIndex = 21;
@@ -41,14 +43,34 @@ const int kFirstCalleeSavedRegisterIndex = 21;
 // still caller-saved.
 const int kNumberOfCalleeSavedFPRegisters = 8;
 const int kFirstCalleeSavedFPRegisterIndex = 8;
+// All predicated instructions accept at least p0-p7 as the governing predicate.
+const unsigned kNumberOfGoverningPRegisters = 8;
 
 // clang-format off
+#define AARCH64_P_REGISTER_CODE_LIST(R)                                        \
+  R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7)                               \
+  R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15)
+
 #define AARCH64_REGISTER_CODE_LIST(R)                                          \
   R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7)                               \
   R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15)                              \
   R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23)                              \
   R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
 
+// SVE loads and stores use "w" instead of "s" for word-sized accesses, so the
+// mapping from the load/store variant to constants like k*RegSize is irregular.
+#define VIXL_SVE_LOAD_STORE_VARIANT_LIST(V) \
+  V(b, B)                            \
+  V(h, H)                            \
+  V(w, S)                            \
+  V(d, D)
+
+// Sign-extending loads don't have double-word variants.
+#define VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(V) \
+  V(b, B)                            \
+  V(h, H)                            \
+  V(w, S)
+
 #define INSTRUCTION_FIELDS_LIST(V_)                                          \
 /* Register fields */                                                        \
 V_(Rd, 4, 0, ExtractBits)         /* Destination register.                */ \
@@ -59,6 +81,11 @@ V_(Ra, 14, 10, ExtractBits)       /* Third source register.               */ \
 V_(Rt, 4, 0, ExtractBits)         /* Load/store register.                 */ \
 V_(Rt2, 14, 10, ExtractBits)      /* Load/store second register.          */ \
 V_(Rs, 20, 16, ExtractBits)       /* Exclusive access status.             */ \
+V_(Pt, 3, 0, ExtractBits)         /* Load/store register (p0-p7).         */ \
+V_(Pd, 3, 0, ExtractBits)         /* SVE destination predicate register.  */ \
+V_(Pn, 8, 5, ExtractBits)         /* SVE first source predicate register. */ \
+V_(Pm, 19, 16, ExtractBits)       /* SVE second source predicate register.*/ \
+V_(PgLow8, 12, 10, ExtractBits)   /* Governing predicate (p0-p7).         */ \
                                                                              \
 /* Common bits */                                                            \
 V_(SixtyFourBits, 31, 31, ExtractBits)                                       \
@@ -74,7 +101,7 @@ V_(ImmDPShift, 15, 10, ExtractBits)                                          \
                                                                              \
 /* Add/subtract immediate */                                                 \
 V_(ImmAddSub, 21, 10, ExtractBits)                                           \
-V_(ShiftAddSub, 23, 22, ExtractBits)                                         \
+V_(ImmAddSubShift, 22, 22, ExtractBits)                                      \
                                                                              \
 /* Add/substract extend */                                                   \
 V_(ImmExtendShift, 12, 10, ExtractBits)                                      \
@@ -177,7 +204,23 @@ V_(NEONCmode, 15, 12, ExtractBits)                                           \
 /* NEON Shift Immediate fields */                                            \
 V_(ImmNEONImmhImmb, 22, 16, ExtractBits)                                     \
 V_(ImmNEONImmh, 22, 19, ExtractBits)                                         \
-V_(ImmNEONImmb, 18, 16, ExtractBits)
+V_(ImmNEONImmb, 18, 16, ExtractBits)                                         \
+                                                                             \
+/* SVE generic fields */                                                     \
+V_(SVESize, 23, 22, ExtractBits)                                             \
+V_(ImmSVEVLScale, 10, 5, ExtractSignedBits)                                  \
+V_(ImmSVEIntWideSigned, 12, 5, ExtractSignedBits)                            \
+V_(ImmSVEIntWideUnsigned, 12, 5, ExtractBits)                                \
+V_(ImmSVEPredicateConstraint, 9, 5, ExtractBits)                             \
+                                                                             \
+/* SVE Bitwise Immediate bitfield */                                         \
+V_(SVEBitN, 17, 17, ExtractBits)                                             \
+V_(SVEImmRotate, 16, 11, ExtractBits)                                        \
+V_(SVEImmSetBits, 10, 5, ExtractBits)                                        \
+                                                                             \
+V_(SVEImmPrefetchOperation, 3, 0, ExtractBits)                               \
+V_(SVEPrefetchHint, 3, 3, ExtractBits)
+
 // clang-format on
 
 #define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \
@@ -235,7 +278,22 @@ enum Condition {
 
   // Aliases.
   hs = cs,  // C set            Unsigned higher or same.
-  lo = cc   // C clear          Unsigned lower.
+  lo = cc,  // C clear          Unsigned lower.
+
+  // Floating-point additional condition code.
+  uo,       // Unordered comparison.
+
+  // SVE predicate condition aliases.
+  sve_none  = eq,  // No active elements were true.
+  sve_any   = ne,  // An active element was true.
+  sve_nlast = cs,  // The last element was not true.
+  sve_last  = cc,  // The last element was true.
+  sve_first = mi,  // The first element was true.
+  sve_nfrst = pl,  // The first element was not true.
+  sve_pmore = hi,  // An active element was true but not the last element.
+  sve_plast = ls,  // The last active element was true or no active elements were true.
+  sve_tcont = ge,  // CTERM termination condition not deleted.
+  sve_tstop = lt   // CTERM termination condition deleted.
 };
 
 inline Condition InvertCondition(Condition cond) {
@@ -279,7 +337,12 @@ enum StatusFlags {
   FPEqualFlag       = ZCFlag,
   FPLessThanFlag    = NFlag,
   FPGreaterThanFlag = CFlag,
-  FPUnorderedFlag   = CVFlag
+  FPUnorderedFlag   = CVFlag,
+
+  // SVE condition flags.
+  SVEFirstFlag   = NFlag,
+  SVENoneFlag    = ZFlag,
+  SVENotLastFlag = CFlag
 };
 
 enum Shift {
@@ -303,6 +366,17 @@ enum Extend {
   SXTX      = 7
 };
 
+enum SVEOffsetModifier {
+  NO_SVE_OFFSET_MODIFIER,
+  // Multiply (each element of) the offset by either the vector or predicate
+  // length, according to the context.
+  SVE_MUL_VL,
+  // Shift or extend modifiers (as in `Shift` or `Extend`).
+  SVE_LSL,
+  SVE_UXTW,
+  SVE_SXTW
+};
+
 enum SystemHint {
   NOP    = 0,
   YIELD  = 1,
@@ -368,6 +442,12 @@ enum PrefetchOperation {
   PSTL3STRM = 0x15
 };
 
+constexpr bool IsNamedPrefetchOperation(int op) {
+  return ((op >= PLDL1KEEP) && (op <= PLDL3STRM)) ||
+      ((op >= PLIL1KEEP) && (op <= PLIL3STRM)) ||
+      ((op >= PSTL1KEEP) && (op <= PSTL3STRM));
+}
+
 enum BType {
   // Set when executing any instruction on a guarded page, except those cases
   // listed below.
@@ -429,6 +509,36 @@ enum DataCacheOp {
   ZVA = CacheOpEncoder<3, 7, 4, 1>::value
 };
 
+// Some SVE instructions support a predicate constraint pattern. This is
+// interpreted as a VL-dependent value, and is typically used to initialise
+// predicates, or to otherwise limit the number of processed elements.
+enum SVEPredicateConstraint {
+  // Select 2^N elements, for the largest possible N.
+  SVE_POW2 = 0x0,
+  // Each VL<N> selects exactly N elements if possible, or zero if N is greater
+  // than the number of elements. Note that the encoding values for VL<N> are
+  // not linearly related to N.
+  SVE_VL1 = 0x1,
+  SVE_VL2 = 0x2,
+  SVE_VL3 = 0x3,
+  SVE_VL4 = 0x4,
+  SVE_VL5 = 0x5,
+  SVE_VL6 = 0x6,
+  SVE_VL7 = 0x7,
+  SVE_VL8 = 0x8,
+  SVE_VL16 = 0x9,
+  SVE_VL32 = 0xa,
+  SVE_VL64 = 0xb,
+  SVE_VL128 = 0xc,
+  SVE_VL256 = 0xd,
+  // Each MUL<N> selects the largest multiple of N elements that the vector
+  // length supports. Note that for D-sized lanes, this can be zero.
+  SVE_MUL4 = 0x1d,
+  SVE_MUL3 = 0x1e,
+  // Select all elements.
+  SVE_ALL = 0x1f
+};
+
 // Instruction enumerations.
 //
 // These are the masks that define a class of instructions, and the list of
@@ -503,6 +613,14 @@ enum NEONScalarFormatField {
   NEON_D                    = 0x00C00000
 };
 
+enum SVESizeField {
+  SVESizeFieldMask = 0x00C00000,
+  SVE_B            = 0x00000000,
+  SVE_H            = 0x00400000,
+  SVE_S            = 0x00800000,
+  SVE_D            = 0x00C00000
+};
+
 // PC relative addressing.
 enum PCRelAddressingOp {
   PCRelAddressingFixed = 0x10000000,
@@ -531,8 +649,8 @@ enum AddSubOp {
 
 enum AddSubImmediateOp {
   AddSubImmediateFixed = 0x11000000,
-  AddSubImmediateFMask = 0x1F000000,
-  AddSubImmediateMask  = 0xFF000000,
+  AddSubImmediateFMask = 0x1F800000,
+  AddSubImmediateMask  = 0xFF800000,
   #define ADD_SUB_IMMEDIATE(A)           \
   A##_w_imm = AddSubImmediateFixed | A,  \
   A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits
@@ -2660,11 +2778,1626 @@ enum NEONScalarShiftImmediateOp {
   NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm
 };
 
+enum SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsOp {
+  SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed = 0x84A00000,
+  SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000,
+  SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000,
+  LD1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed,
+  LDFF1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00002000,
+  LD1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+  LDFF1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsOp {
+  SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed = 0x85200000,
+  SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000,
+  SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000,
+  LD1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+  LDFF1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsOp {
+  SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed = 0x84000000,
+  SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE208000,
+  SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask = 0xFFA0E000,
+  LD1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed,
+  LDFF1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00002000,
+  LD1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00004000,
+  LDFF1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00006000,
+  LD1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000,
+  LDFF1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00802000,
+  LD1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00804000,
+  LDFF1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00806000,
+  LD1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01004000,
+  LDFF1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01006000
+};
+
+enum SVE32BitGatherLoad_VectorPlusImmOp {
+  SVE32BitGatherLoad_VectorPlusImmFixed = 0x84208000,
+  SVE32BitGatherLoad_VectorPlusImmFMask = 0xFE608000,
+  SVE32BitGatherLoad_VectorPlusImmMask = 0xFFE0E000,
+  LD1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed,
+  LDFF1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00002000,
+  LD1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00004000,
+  LDFF1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00006000,
+  LD1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00800000,
+  LDFF1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00802000,
+  LD1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00804000,
+  LDFF1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00806000,
+  LD1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01004000,
+  LDFF1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01006000
+};
+
+enum SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsOp {
+  SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed = 0x84200000,
+  SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08010,
+  SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E010,
+  PRFB_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed,
+  PRFH_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00002000,
+  PRFW_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00004000,
+  PRFD_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE32BitGatherPrefetch_VectorPlusImmOp {
+  SVE32BitGatherPrefetch_VectorPlusImmFixed = 0x8400E000,
+  SVE32BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010,
+  SVE32BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010,
+  PRFB_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed,
+  PRFH_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x00800000,
+  PRFW_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01000000,
+  PRFD_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsOp {
+  SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed = 0xE4608000,
+  SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFMask = 0xFE60A000,
+  SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask = 0xFFE0A000,
+  ST1H_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x01000000
+};
+
+enum SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsOp {
+  SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed = 0xE4408000,
+  SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE60A000,
+  SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask = 0xFFE0A000,
+  ST1B_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed,
+  ST1H_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x01000000
+};
+
+enum SVE32BitScatterStore_VectorPlusImmOp {
+  SVE32BitScatterStore_VectorPlusImmFixed = 0xE460A000,
+  SVE32BitScatterStore_VectorPlusImmFMask = 0xFE60E000,
+  SVE32BitScatterStore_VectorPlusImmMask = 0xFFE0E000,
+  ST1B_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed,
+  ST1H_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x00800000,
+  ST1W_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x01000000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsOp {
+  SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed = 0xC4200000,
+  SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFMask = 0xFE208000,
+  SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask = 0xFFA0E000,
+  LD1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00800000,
+  LDFF1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00802000,
+  LD1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00804000,
+  LDFF1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00806000,
+  LD1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01000000,
+  LDFF1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01002000,
+  LD1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01004000,
+  LDFF1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01006000,
+  LD1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01804000,
+  LDFF1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsOp {
+  SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000,
+  SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFMask = 0xFE608000,
+  SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000,
+  LD1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00800000,
+  LDFF1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00802000,
+  LD1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00804000,
+  LDFF1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00806000,
+  LD1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01000000,
+  LDFF1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01002000,
+  LD1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01004000,
+  LDFF1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01006000,
+  LD1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01804000,
+  LDFF1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsOp {
+  SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed = 0xC4408000,
+  SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE608000,
+  SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000,
+  LD1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed,
+  LDFF1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00002000,
+  LD1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00004000,
+  LDFF1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00006000,
+  LD1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000,
+  LDFF1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00802000,
+  LD1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00804000,
+  LDFF1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00806000,
+  LD1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000,
+  LDFF1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01002000,
+  LD1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01004000,
+  LDFF1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01006000,
+  LD1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01804000,
+  LDFF1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsOp {
+  SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xC4000000,
+  SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE208000,
+  SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFA0E000,
+  LD1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed,
+  LDFF1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00002000,
+  LD1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00004000,
+  LDFF1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00006000,
+  LD1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000,
+  LDFF1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00802000,
+  LD1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00804000,
+  LDFF1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00806000,
+  LD1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000,
+  LDFF1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01002000,
+  LD1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01004000,
+  LDFF1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01006000,
+  LD1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01804000,
+  LDFF1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01806000
+};
+
+enum SVE64BitGatherLoad_VectorPlusImmOp {
+  SVE64BitGatherLoad_VectorPlusImmFixed = 0xC4208000,
+  SVE64BitGatherLoad_VectorPlusImmFMask = 0xFE608000,
+  SVE64BitGatherLoad_VectorPlusImmMask = 0xFFE0E000,
+  LD1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed,
+  LDFF1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00002000,
+  LD1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00004000,
+  LDFF1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00006000,
+  LD1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00800000,
+  LDFF1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00802000,
+  LD1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00804000,
+  LDFF1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00806000,
+  LD1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01000000,
+  LDFF1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01002000,
+  LD1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01004000,
+  LDFF1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01006000,
+  LD1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01804000,
+  LDFF1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01806000
+};
+
+enum SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsOp {
+  SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000,
+  SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFMask = 0xFFE08010,
+  SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E010,
+  PRFB_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed,
+  PRFH_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00002000,
+  PRFW_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00004000,
+  PRFD_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsOp {
+  SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xC4200000,
+  SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFFA08010,
+  SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFA0E010,
+  PRFB_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed,
+  PRFH_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00002000,
+  PRFW_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00004000,
+  PRFD_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00006000
+};
+
+enum SVE64BitGatherPrefetch_VectorPlusImmOp {
+  SVE64BitGatherPrefetch_VectorPlusImmFixed = 0xC400E000,
+  SVE64BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010,
+  SVE64BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010,
+  PRFB_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed,
+  PRFH_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x00800000,
+  PRFW_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01000000,
+  PRFD_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsOp {
+  SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed = 0xE420A000,
+  SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFMask = 0xFE60E000,
+  SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000,
+  ST1H_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01000000,
+  ST1D_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsOp {
+  SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed = 0xE400A000,
+  SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE60E000,
+  SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000,
+  ST1B_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed,
+  ST1H_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000,
+  ST1D_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsOp {
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xE4208000,
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFE60A000,
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFE0A000,
+  ST1H_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01000000,
+  ST1D_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsOp {
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xE4008000,
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE60A000,
+  SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFE0A000,
+  ST1B_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed,
+  ST1H_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000,
+  ST1W_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000,
+  ST1D_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01800000
+};
+
+enum SVE64BitScatterStore_VectorPlusImmOp {
+  SVE64BitScatterStore_VectorPlusImmFixed = 0xE440A000,
+  SVE64BitScatterStore_VectorPlusImmFMask = 0xFE60E000,
+  SVE64BitScatterStore_VectorPlusImmMask = 0xFFE0E000,
+  ST1B_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed,
+  ST1H_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x00800000,
+  ST1W_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01000000,
+  ST1D_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01800000
+};
+
+enum SVEAddressGenerationOp {
+  SVEAddressGenerationFixed = 0x0420A000,
+  SVEAddressGenerationFMask = 0xFF20F000,
+  SVEAddressGenerationMask = 0xFFE0F000,
+  ADR_z_az_d_s32_scaled = SVEAddressGenerationFixed,
+  ADR_z_az_d_u32_scaled = SVEAddressGenerationFixed | 0x00400000,
+  ADR_z_az_s_same_scaled = SVEAddressGenerationFixed | 0x00800000,
+  ADR_z_az_d_same_scaled = SVEAddressGenerationFixed | 0x00C00000
+};
+
+enum SVEBitwiseLogicalUnpredicatedOp {
+  SVEBitwiseLogicalUnpredicatedFixed = 0x04202000,
+  SVEBitwiseLogicalUnpredicatedFMask = 0xFF20E000,
+  SVEBitwiseLogicalUnpredicatedMask = 0xFFE0FC00,
+  AND_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00001000,
+  ORR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00401000,
+  EOR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00801000,
+  BIC_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00C01000
+};
+
+enum SVEBitwiseLogicalWithImm_UnpredicatedOp {
+  SVEBitwiseLogicalWithImm_UnpredicatedFixed = 0x05000000,
+  SVEBitwiseLogicalWithImm_UnpredicatedFMask = 0xFF3C0000,
+  SVEBitwiseLogicalWithImm_UnpredicatedMask = 0xFFFC0000,
+  ORR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed,
+  EOR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00400000,
+  AND_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00800000
+};
+
+enum SVEBitwiseLogical_PredicatedOp {
+  SVEBitwiseLogical_PredicatedFixed = 0x04180000,
+  SVEBitwiseLogical_PredicatedFMask = 0xFF38E000,
+  SVEBitwiseLogical_PredicatedMask = 0xFF3FE000,
+  ORR_z_p_zz = SVEBitwiseLogical_PredicatedFixed,
+  EOR_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00010000,
+  AND_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00020000,
+  BIC_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00030000
+};
+
+enum SVEBitwiseShiftByImm_PredicatedOp {
+  SVEBitwiseShiftByImm_PredicatedFixed = 0x04008000,
+  SVEBitwiseShiftByImm_PredicatedFMask = 0xFF30E000,
+  SVEBitwiseShiftByImm_PredicatedMask = 0xFF3FE000,
+  ASR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed,
+  LSR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00010000,
+  LSL_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00030000,
+  ASRD_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00040000
+};
+
+enum SVEBitwiseShiftByVector_PredicatedOp {
+  SVEBitwiseShiftByVector_PredicatedFixed = 0x04108000,
+  SVEBitwiseShiftByVector_PredicatedFMask = 0xFF38E000,
+  SVEBitwiseShiftByVector_PredicatedMask = 0xFF3FE000,
+  ASR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed,
+  LSR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00010000,
+  LSL_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00030000,
+  ASRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00040000,
+  LSRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00050000,
+  LSLR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00070000
+};
+
+enum SVEBitwiseShiftByWideElements_PredicatedOp {
+  SVEBitwiseShiftByWideElements_PredicatedFixed = 0x04188000,
+  SVEBitwiseShiftByWideElements_PredicatedFMask = 0xFF38E000,
+  SVEBitwiseShiftByWideElements_PredicatedMask = 0xFF3FE000,
+  ASR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed,
+  LSR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00010000,
+  LSL_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00030000
+};
+
+enum SVEBitwiseShiftUnpredicatedOp {
+  SVEBitwiseShiftUnpredicatedFixed = 0x04208000,
+  SVEBitwiseShiftUnpredicatedFMask = 0xFF20E000,
+  SVEBitwiseShiftUnpredicatedMask = 0xFF20FC00,
+  ASR_z_zw = SVEBitwiseShiftUnpredicatedFixed,
+  LSR_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000400,
+  LSL_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000C00,
+  ASR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001000,
+  LSR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001400,
+  LSL_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEBroadcastBitmaskImmOp {
+  SVEBroadcastBitmaskImmFixed = 0x05C00000,
+  SVEBroadcastBitmaskImmFMask = 0xFFFC0000,
+  SVEBroadcastBitmaskImmMask = 0xFFFC0000,
+  DUPM_z_i = SVEBroadcastBitmaskImmFixed
+};
+
+enum SVEBroadcastFPImm_UnpredicatedOp {
+  SVEBroadcastFPImm_UnpredicatedFixed = 0x2539C000,
+  SVEBroadcastFPImm_UnpredicatedFMask = 0xFF39C000,
+  SVEBroadcastFPImm_UnpredicatedMask = 0xFF3FE000,
+  FDUP_z_i = SVEBroadcastFPImm_UnpredicatedFixed
+};
+
+enum SVEBroadcastGeneralRegisterOp {
+  SVEBroadcastGeneralRegisterFixed = 0x05203800,
+  SVEBroadcastGeneralRegisterFMask = 0xFF3FFC00,
+  SVEBroadcastGeneralRegisterMask = 0xFF3FFC00,
+  DUP_z_r = SVEBroadcastGeneralRegisterFixed
+};
+
+enum SVEBroadcastIndexElementOp {
+  SVEBroadcastIndexElementFixed = 0x05202000,
+  SVEBroadcastIndexElementFMask = 0xFF20FC00,
+  SVEBroadcastIndexElementMask = 0xFF20FC00,
+  DUP_z_zi = SVEBroadcastIndexElementFixed
+};
+
+enum SVEBroadcastIntImm_UnpredicatedOp {
+  SVEBroadcastIntImm_UnpredicatedFixed = 0x2538C000,
+  SVEBroadcastIntImm_UnpredicatedFMask = 0xFF39C000,
+  SVEBroadcastIntImm_UnpredicatedMask = 0xFF3FC000,
+  DUP_z_i = SVEBroadcastIntImm_UnpredicatedFixed
+};
+
+enum SVECompressActiveElementsOp {
+  SVECompressActiveElementsFixed = 0x05A18000,
+  SVECompressActiveElementsFMask = 0xFFBFE000,
+  SVECompressActiveElementsMask = 0xFFBFE000,
+  COMPACT_z_p_z = SVECompressActiveElementsFixed
+};
+
+enum SVEConditionallyBroadcastElementToVectorOp {
+  SVEConditionallyBroadcastElementToVectorFixed = 0x05288000,
+  SVEConditionallyBroadcastElementToVectorFMask = 0xFF3EE000,
+  SVEConditionallyBroadcastElementToVectorMask = 0xFF3FE000,
+  CLASTA_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed,
+  CLASTB_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed | 0x00010000
+};
+
+enum SVEConditionallyExtractElementToGeneralRegisterOp {
+  SVEConditionallyExtractElementToGeneralRegisterFixed = 0x0530A000,
+  SVEConditionallyExtractElementToGeneralRegisterFMask = 0xFF3EE000,
+  SVEConditionallyExtractElementToGeneralRegisterMask = 0xFF3FE000,
+  CLASTA_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed,
+  CLASTB_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed | 0x00010000
+};
+
+enum SVEConditionallyExtractElementToSIMDFPScalarOp {
+  SVEConditionallyExtractElementToSIMDFPScalarFixed = 0x052A8000,
+  SVEConditionallyExtractElementToSIMDFPScalarFMask = 0xFF3EE000,
+  SVEConditionallyExtractElementToSIMDFPScalarMask = 0xFF3FE000,
+  CLASTA_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed,
+  CLASTB_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed | 0x00010000
+};
+
+enum SVEConditionallyTerminateScalarsOp {
+  SVEConditionallyTerminateScalarsFixed = 0x25202000,
+  SVEConditionallyTerminateScalarsFMask = 0xFF20FC0F,
+  SVEConditionallyTerminateScalarsMask = 0xFFA0FC1F,
+  CTERMEQ_rr = SVEConditionallyTerminateScalarsFixed | 0x00800000,
+  CTERMNE_rr = SVEConditionallyTerminateScalarsFixed | 0x00800010
+};
+
+enum SVEConstructivePrefix_UnpredicatedOp {
+  SVEConstructivePrefix_UnpredicatedFixed = 0x0420BC00,
+  SVEConstructivePrefix_UnpredicatedFMask = 0xFF20FC00,
+  SVEConstructivePrefix_UnpredicatedMask = 0xFFFFFC00,
+  MOVPRFX_z_z = SVEConstructivePrefix_UnpredicatedFixed
+};
+
+enum SVEContiguousFirstFaultLoad_ScalarPlusScalarOp {
+  SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed = 0xA4006000,
+  SVEContiguousFirstFaultLoad_ScalarPlusScalarFMask = 0xFE00E000,
+  SVEContiguousFirstFaultLoad_ScalarPlusScalarMask = 0xFFE0E000,
+  LDFF1B_z_p_br_u8 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed,
+  LDFF1B_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00200000,
+  LDFF1B_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00400000,
+  LDFF1B_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00600000,
+  LDFF1SW_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00800000,
+  LDFF1H_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00A00000,
+  LDFF1H_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00C00000,
+  LDFF1H_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00E00000,
+  LDFF1SH_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01000000,
+  LDFF1SH_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01200000,
+  LDFF1W_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01400000,
+  LDFF1W_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01600000,
+  LDFF1SB_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01800000,
+  LDFF1SB_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01A00000,
+  LDFF1SB_z_p_br_s16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01C00000,
+  LDFF1D_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEContiguousLoad_ScalarPlusImmOp {
+  SVEContiguousLoad_ScalarPlusImmFixed = 0xA400A000,
+  SVEContiguousLoad_ScalarPlusImmFMask = 0xFE10E000,
+  SVEContiguousLoad_ScalarPlusImmMask = 0xFFF0E000,
+  LD1B_z_p_bi_u8 = SVEContiguousLoad_ScalarPlusImmFixed,
+  LD1B_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00200000,
+  LD1B_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00400000,
+  LD1B_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00600000,
+  LD1SW_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00800000,
+  LD1H_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00A00000,
+  LD1H_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00C00000,
+  LD1H_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00E00000,
+  LD1SH_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01000000,
+  LD1SH_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01200000,
+  LD1W_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01400000,
+  LD1W_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01600000,
+  LD1SB_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01800000,
+  LD1SB_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01A00000,
+  LD1SB_z_p_bi_s16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01C00000,
+  LD1D_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEContiguousLoad_ScalarPlusScalarOp {
+  SVEContiguousLoad_ScalarPlusScalarFixed = 0xA4004000,
+  SVEContiguousLoad_ScalarPlusScalarFMask = 0xFE00E000,
+  SVEContiguousLoad_ScalarPlusScalarMask = 0xFFE0E000,
+  LD1B_z_p_br_u8 = SVEContiguousLoad_ScalarPlusScalarFixed,
+  LD1B_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00200000,
+  LD1B_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00400000,
+  LD1B_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00600000,
+  LD1SW_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00800000,
+  LD1H_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00A00000,
+  LD1H_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00C00000,
+  LD1H_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00E00000,
+  LD1SH_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01000000,
+  LD1SH_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01200000,
+  LD1W_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01400000,
+  LD1W_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01600000,
+  LD1SB_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01800000,
+  LD1SB_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01A00000,
+  LD1SB_z_p_br_s16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01C00000,
+  LD1D_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEContiguousNonFaultLoad_ScalarPlusImmOp {
+  SVEContiguousNonFaultLoad_ScalarPlusImmFixed = 0xA410A000,
+  SVEContiguousNonFaultLoad_ScalarPlusImmFMask = 0xFE10E000,
+  SVEContiguousNonFaultLoad_ScalarPlusImmMask = 0xFFF0E000,
+  LDNF1B_z_p_bi_u8 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed,
+  LDNF1B_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00200000,
+  LDNF1B_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00400000,
+  LDNF1B_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00600000,
+  LDNF1SW_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00800000,
+  LDNF1H_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00A00000,
+  LDNF1H_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00C00000,
+  LDNF1H_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00E00000,
+  LDNF1SH_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01000000,
+  LDNF1SH_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01200000,
+  LDNF1W_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01400000,
+  LDNF1W_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01600000,
+  LDNF1SB_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01800000,
+  LDNF1SB_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01A00000,
+  LDNF1SB_z_p_bi_s16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01C00000,
+  LDNF1D_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEContiguousNonTemporalLoad_ScalarPlusImmOp {
+  SVEContiguousNonTemporalLoad_ScalarPlusImmFixed = 0xA400E000,
+  SVEContiguousNonTemporalLoad_ScalarPlusImmFMask = 0xFE70E000,
+  SVEContiguousNonTemporalLoad_ScalarPlusImmMask = 0xFFF0E000,
+  LDNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed,
+  LDNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x00800000,
+  LDNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01000000,
+  LDNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalLoad_ScalarPlusScalarOp {
+  SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed = 0xA400C000,
+  SVEContiguousNonTemporalLoad_ScalarPlusScalarFMask = 0xFE60E000,
+  SVEContiguousNonTemporalLoad_ScalarPlusScalarMask = 0xFFE0E000,
+  LDNT1B_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed,
+  LDNT1H_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x00800000,
+  LDNT1W_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01000000,
+  LDNT1D_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalStore_ScalarPlusImmOp {
+  SVEContiguousNonTemporalStore_ScalarPlusImmFixed = 0xE410E000,
+  SVEContiguousNonTemporalStore_ScalarPlusImmFMask = 0xFE70E000,
+  SVEContiguousNonTemporalStore_ScalarPlusImmMask = 0xFFF0E000,
+  STNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed,
+  STNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x00800000,
+  STNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01000000,
+  STNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousNonTemporalStore_ScalarPlusScalarOp {
+  SVEContiguousNonTemporalStore_ScalarPlusScalarFixed = 0xE4006000,
+  SVEContiguousNonTemporalStore_ScalarPlusScalarFMask = 0xFE60E000,
+  SVEContiguousNonTemporalStore_ScalarPlusScalarMask = 0xFFE0E000,
+  STNT1B_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed,
+  STNT1H_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x00800000,
+  STNT1W_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01000000,
+  STNT1D_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousPrefetch_ScalarPlusImmOp {
+  SVEContiguousPrefetch_ScalarPlusImmFixed = 0x85C00000,
+  SVEContiguousPrefetch_ScalarPlusImmFMask = 0xFFC08010,
+  SVEContiguousPrefetch_ScalarPlusImmMask = 0xFFC0E010,
+  PRFB_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed,
+  PRFH_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00002000,
+  PRFW_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00004000,
+  PRFD_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00006000
+};
+
+enum SVEContiguousPrefetch_ScalarPlusScalarOp {
+  SVEContiguousPrefetch_ScalarPlusScalarFixed = 0x8400C000,
+  SVEContiguousPrefetch_ScalarPlusScalarFMask = 0xFE60E010,
+  SVEContiguousPrefetch_ScalarPlusScalarMask = 0xFFE0E010,
+  PRFB_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed,
+  PRFH_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x00800000,
+  PRFW_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01000000,
+  PRFD_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVEContiguousStore_ScalarPlusImmOp {
+  SVEContiguousStore_ScalarPlusImmFixed = 0xE400E000,
+  SVEContiguousStore_ScalarPlusImmFMask = 0xFE10E000,
+  SVEContiguousStore_ScalarPlusImmMask = 0xFF90E000,
+  ST1B_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed,
+  ST1H_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x00800000,
+  ST1W_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01000000,
+  ST1D_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVEContiguousStore_ScalarPlusScalarOp {
+  SVEContiguousStore_ScalarPlusScalarFixed = 0xE4004000,
+  SVEContiguousStore_ScalarPlusScalarFMask = 0xFE00E000,
+  SVEContiguousStore_ScalarPlusScalarMask = 0xFF80E000,
+  ST1B_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed,
+  ST1H_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x00800000,
+  ST1W_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01000000,
+  ST1D_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVECopyFPImm_PredicatedOp {
+  SVECopyFPImm_PredicatedFixed = 0x0510C000,
+  SVECopyFPImm_PredicatedFMask = 0xFF30E000,
+  SVECopyFPImm_PredicatedMask = 0xFF30E000,
+  FCPY_z_p_i = SVECopyFPImm_PredicatedFixed
+};
+
+enum SVECopyGeneralRegisterToVector_PredicatedOp {
+  SVECopyGeneralRegisterToVector_PredicatedFixed = 0x0528A000,
+  SVECopyGeneralRegisterToVector_PredicatedFMask = 0xFF3FE000,
+  SVECopyGeneralRegisterToVector_PredicatedMask = 0xFF3FE000,
+  CPY_z_p_r = SVECopyGeneralRegisterToVector_PredicatedFixed
+};
+
+enum SVECopyIntImm_PredicatedOp {
+  SVECopyIntImm_PredicatedFixed = 0x05100000,
+  SVECopyIntImm_PredicatedFMask = 0xFF308000,
+  SVECopyIntImm_PredicatedMask = 0xFF308000,
+  CPY_z_p_i = SVECopyIntImm_PredicatedFixed
+};
+
+enum SVECopySIMDFPScalarRegisterToVector_PredicatedOp {
+  SVECopySIMDFPScalarRegisterToVector_PredicatedFixed = 0x05208000,
+  SVECopySIMDFPScalarRegisterToVector_PredicatedFMask = 0xFF3FE000,
+  SVECopySIMDFPScalarRegisterToVector_PredicatedMask = 0xFF3FE000,
+  CPY_z_p_v = SVECopySIMDFPScalarRegisterToVector_PredicatedFixed
+};
+
+enum SVEElementCountOp {
+  SVEElementCountFixed = 0x0420E000,
+  SVEElementCountFMask = 0xFF30F800,
+  SVEElementCountMask = 0xFFF0FC00,
+  CNTB_r_s = SVEElementCountFixed,
+  CNTH_r_s = SVEElementCountFixed | 0x00400000,
+  CNTW_r_s = SVEElementCountFixed | 0x00800000,
+  CNTD_r_s = SVEElementCountFixed | 0x00C00000
+};
+
+enum SVEExtractElementToGeneralRegisterOp {
+  SVEExtractElementToGeneralRegisterFixed = 0x0520A000,
+  SVEExtractElementToGeneralRegisterFMask = 0xFF3EE000,
+  SVEExtractElementToGeneralRegisterMask = 0xFF3FE000,
+  LASTA_r_p_z = SVEExtractElementToGeneralRegisterFixed,
+  LASTB_r_p_z = SVEExtractElementToGeneralRegisterFixed | 0x00010000
+};
+
+enum SVEExtractElementToSIMDFPScalarRegisterOp {
+  SVEExtractElementToSIMDFPScalarRegisterFixed = 0x05228000,
+  SVEExtractElementToSIMDFPScalarRegisterFMask = 0xFF3EE000,
+  SVEExtractElementToSIMDFPScalarRegisterMask = 0xFF3FE000,
+  LASTA_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed,
+  LASTB_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed | 0x00010000
+};
+
+enum SVEFFRInitialiseOp {
+  SVEFFRInitialiseFixed = 0x252C9000,
+  SVEFFRInitialiseFMask = 0xFF3FFFFF,
+  SVEFFRInitialiseMask = 0xFFFFFFFF,
+  SETFFR_f = SVEFFRInitialiseFixed
+};
+
+enum SVEFFRWriteFromPredicateOp {
+  SVEFFRWriteFromPredicateFixed = 0x25289000,
+  SVEFFRWriteFromPredicateFMask = 0xFF3FFE1F,
+  SVEFFRWriteFromPredicateMask = 0xFFFFFE1F,
+  WRFFR_f_p = SVEFFRWriteFromPredicateFixed
+};
+
+enum SVEFPAccumulatingReductionOp {
+  SVEFPAccumulatingReductionFixed = 0x65182000,
+  SVEFPAccumulatingReductionFMask = 0xFF38E000,
+  SVEFPAccumulatingReductionMask = 0xFF3FE000,
+  FADDA_v_p_z = SVEFPAccumulatingReductionFixed
+};
+
+enum SVEFPArithmeticUnpredicatedOp {
+  SVEFPArithmeticUnpredicatedFixed = 0x65000000,
+  SVEFPArithmeticUnpredicatedFMask = 0xFF20E000,
+  SVEFPArithmeticUnpredicatedMask = 0xFF20FC00,
+  FADD_z_zz = SVEFPArithmeticUnpredicatedFixed,
+  FSUB_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000400,
+  FMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000800,
+  FTSMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000C00,
+  FRECPS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001800,
+  FRSQRTS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEFPArithmeticWithImm_PredicatedOp {
+  SVEFPArithmeticWithImm_PredicatedFixed = 0x65188000,
+  SVEFPArithmeticWithImm_PredicatedFMask = 0xFF38E3C0,
+  SVEFPArithmeticWithImm_PredicatedMask = 0xFF3FE3C0,
+  FADD_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed,
+  FSUB_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00010000,
+  FMUL_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00020000,
+  FSUBR_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00030000,
+  FMAXNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00040000,
+  FMINNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00050000,
+  FMAX_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00060000,
+  FMIN_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00070000
+};
+
+enum SVEFPArithmetic_PredicatedOp {
+  SVEFPArithmetic_PredicatedFixed = 0x65008000,
+  SVEFPArithmetic_PredicatedFMask = 0xFF30E000,
+  SVEFPArithmetic_PredicatedMask = 0xFF3FE000,
+  FADD_z_p_zz = SVEFPArithmetic_PredicatedFixed,
+  FSUB_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00010000,
+  FMUL_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00020000,
+  FSUBR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00030000,
+  FMAXNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00040000,
+  FMINNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00050000,
+  FMAX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00060000,
+  FMIN_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00070000,
+  FABD_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00080000,
+  FSCALE_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00090000,
+  FMULX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000A0000,
+  FDIVR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000C0000,
+  FDIV_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000D0000
+};
+
+enum SVEFPCompareVectorsOp {
+  SVEFPCompareVectorsFixed = 0x65004000,
+  SVEFPCompareVectorsFMask = 0xFF204000,
+  SVEFPCompareVectorsMask = 0xFF20E010,
+  FCMGE_p_p_zz = SVEFPCompareVectorsFixed,
+  FCMGT_p_p_zz = SVEFPCompareVectorsFixed | 0x00000010,
+  FCMEQ_p_p_zz = SVEFPCompareVectorsFixed | 0x00002000,
+  FCMNE_p_p_zz = SVEFPCompareVectorsFixed | 0x00002010,
+  FCMUO_p_p_zz = SVEFPCompareVectorsFixed | 0x00008000,
+  FACGE_p_p_zz = SVEFPCompareVectorsFixed | 0x00008010,
+  FACGT_p_p_zz = SVEFPCompareVectorsFixed | 0x0000A010
+};
+
+enum SVEFPCompareWithZeroOp {
+  SVEFPCompareWithZeroFixed = 0x65102000,
+  SVEFPCompareWithZeroFMask = 0xFF38E000,
+  SVEFPCompareWithZeroMask = 0xFF3FE010,
+  FCMGE_p_p_z0 = SVEFPCompareWithZeroFixed,
+  FCMGT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00000010,
+  FCMLT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010000,
+  FCMLE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010010,
+  FCMEQ_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00020000,
+  FCMNE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00030000
+};
+
+enum SVEFPComplexAdditionOp {
+  SVEFPComplexAdditionFixed = 0x64008000,
+  SVEFPComplexAdditionFMask = 0xFF3EE000,
+  SVEFPComplexAdditionMask = 0xFF3EE000,
+  FCADD_z_p_zz = SVEFPComplexAdditionFixed
+};
+
+enum SVEFPComplexMulAddOp {
+  SVEFPComplexMulAddFixed = 0x64000000,
+  SVEFPComplexMulAddFMask = 0xFF208000,
+  SVEFPComplexMulAddMask = 0xFF208000,
+  FCMLA_z_p_zzz = SVEFPComplexMulAddFixed
+};
+
+enum SVEFPComplexMulAddIndexOp {
+  SVEFPComplexMulAddIndexFixed = 0x64201000,
+  SVEFPComplexMulAddIndexFMask = 0xFF20F000,
+  SVEFPComplexMulAddIndexMask = 0xFFE0F000,
+  FCMLA_z_zzzi_h = SVEFPComplexMulAddIndexFixed | 0x00800000,
+  FCMLA_z_zzzi_s = SVEFPComplexMulAddIndexFixed | 0x00C00000
+};
+
+enum SVEFPConvertPrecisionOp {
+  SVEFPConvertPrecisionFixed = 0x6508A000,
+  SVEFPConvertPrecisionFMask = 0xFF3CE000,
+  SVEFPConvertPrecisionMask = 0xFFFFE000,
+  FCVT_z_p_z_s2h = SVEFPConvertPrecisionFixed | 0x00800000,
+  FCVT_z_p_z_h2s = SVEFPConvertPrecisionFixed | 0x00810000,
+  FCVT_z_p_z_d2h = SVEFPConvertPrecisionFixed | 0x00C00000,
+  FCVT_z_p_z_h2d = SVEFPConvertPrecisionFixed | 0x00C10000,
+  FCVT_z_p_z_d2s = SVEFPConvertPrecisionFixed | 0x00C20000,
+  FCVT_z_p_z_s2d = SVEFPConvertPrecisionFixed | 0x00C30000
+};
+
+enum SVEFPConvertToIntOp {
+  SVEFPConvertToIntFixed = 0x6518A000,
+  SVEFPConvertToIntFMask = 0xFF38E000,
+  SVEFPConvertToIntMask = 0xFFFFE000,
+  FCVTZS_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00420000,
+  FCVTZU_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00430000,
+  FCVTZS_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00440000,
+  FCVTZU_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00450000,
+  FCVTZS_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00460000,
+  FCVTZU_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00470000,
+  FCVTZS_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00840000,
+  FCVTZU_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00850000,
+  FCVTZS_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C00000,
+  FCVTZU_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C10000,
+  FCVTZS_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C40000,
+  FCVTZU_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C50000,
+  FCVTZS_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C60000,
+  FCVTZU_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C70000
+};
+
+enum SVEFPExponentialAcceleratorOp {
+  SVEFPExponentialAcceleratorFixed = 0x0420B800,
+  SVEFPExponentialAcceleratorFMask = 0xFF20FC00,
+  SVEFPExponentialAcceleratorMask = 0xFF3FFC00,
+  FEXPA_z_z = SVEFPExponentialAcceleratorFixed
+};
+
+enum SVEFPFastReductionOp {
+  SVEFPFastReductionFixed = 0x65002000,
+  SVEFPFastReductionFMask = 0xFF38E000,
+  SVEFPFastReductionMask = 0xFF3FE000,
+  FADDV_v_p_z = SVEFPFastReductionFixed,
+  FMAXNMV_v_p_z = SVEFPFastReductionFixed | 0x00040000,
+  FMINNMV_v_p_z = SVEFPFastReductionFixed | 0x00050000,
+  FMAXV_v_p_z = SVEFPFastReductionFixed | 0x00060000,
+  FMINV_v_p_z = SVEFPFastReductionFixed | 0x00070000
+};
+
+enum SVEFPMulAddOp {
+  SVEFPMulAddFixed = 0x65200000,
+  SVEFPMulAddFMask = 0xFF200000,
+  SVEFPMulAddMask = 0xFF20E000,
+  FMLA_z_p_zzz = SVEFPMulAddFixed,
+  FMLS_z_p_zzz = SVEFPMulAddFixed | 0x00002000,
+  FNMLA_z_p_zzz = SVEFPMulAddFixed | 0x00004000,
+  FNMLS_z_p_zzz = SVEFPMulAddFixed | 0x00006000,
+  FMAD_z_p_zzz = SVEFPMulAddFixed | 0x00008000,
+  FMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000A000,
+  FNMAD_z_p_zzz = SVEFPMulAddFixed | 0x0000C000,
+  FNMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000E000
+};
+
+enum SVEFPMulAddIndexOp {
+  SVEFPMulAddIndexFixed = 0x64200000,
+  SVEFPMulAddIndexFMask = 0xFF20F800,
+  SVEFPMulAddIndexMask = 0xFFE0FC00,
+  FMLA_z_zzzi_h = SVEFPMulAddIndexFixed,
+  FMLA_z_zzzi_h_i3h = FMLA_z_zzzi_h | 0x00400000,
+  FMLS_z_zzzi_h = SVEFPMulAddIndexFixed | 0x00000400,
+  FMLS_z_zzzi_h_i3h = FMLS_z_zzzi_h | 0x00400000,
+  FMLA_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800000,
+  FMLS_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800400,
+  FMLA_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00000,
+  FMLS_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00400
+};
+
+enum SVEFPMulIndexOp {
+  SVEFPMulIndexFixed = 0x64202000,
+  SVEFPMulIndexFMask = 0xFF20FC00,
+  SVEFPMulIndexMask = 0xFFE0FC00,
+  FMUL_z_zzi_h = SVEFPMulIndexFixed,
+  FMUL_z_zzi_h_i3h = FMUL_z_zzi_h | 0x00400000,
+  FMUL_z_zzi_s = SVEFPMulIndexFixed | 0x00800000,
+  FMUL_z_zzi_d = SVEFPMulIndexFixed | 0x00C00000
+};
+
+enum SVEFPRoundToIntegralValueOp {
+  SVEFPRoundToIntegralValueFixed = 0x6500A000,
+  SVEFPRoundToIntegralValueFMask = 0xFF38E000,
+  SVEFPRoundToIntegralValueMask = 0xFF3FE000,
+  FRINTN_z_p_z = SVEFPRoundToIntegralValueFixed,
+  FRINTP_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00010000,
+  FRINTM_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00020000,
+  FRINTZ_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00030000,
+  FRINTA_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00040000,
+  FRINTX_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00060000,
+  FRINTI_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00070000
+};
+
+enum SVEFPTrigMulAddCoefficientOp {
+  SVEFPTrigMulAddCoefficientFixed = 0x65108000,
+  SVEFPTrigMulAddCoefficientFMask = 0xFF38FC00,
+  SVEFPTrigMulAddCoefficientMask = 0xFF38FC00,
+  FTMAD_z_zzi = SVEFPTrigMulAddCoefficientFixed
+};
+
+enum SVEFPTrigSelectCoefficientOp {
+  SVEFPTrigSelectCoefficientFixed = 0x0420B000,
+  SVEFPTrigSelectCoefficientFMask = 0xFF20F800,
+  SVEFPTrigSelectCoefficientMask = 0xFF20FC00,
+  FTSSEL_z_zz = SVEFPTrigSelectCoefficientFixed
+};
+
+enum SVEFPUnaryOpOp {
+  SVEFPUnaryOpFixed = 0x650CA000,
+  SVEFPUnaryOpFMask = 0xFF3CE000,
+  SVEFPUnaryOpMask = 0xFF3FE000,
+  FRECPX_z_p_z = SVEFPUnaryOpFixed,
+  FSQRT_z_p_z = SVEFPUnaryOpFixed | 0x00010000
+};
+
+enum SVEFPUnaryOpUnpredicatedOp {
+  SVEFPUnaryOpUnpredicatedFixed = 0x65083000,
+  SVEFPUnaryOpUnpredicatedFMask = 0xFF38F000,
+  SVEFPUnaryOpUnpredicatedMask = 0xFF3FFC00,
+  FRECPE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00060000,
+  FRSQRTE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00070000
+};
+
+enum SVEIncDecByPredicateCountOp {
+  SVEIncDecByPredicateCountFixed = 0x25288000,
+  SVEIncDecByPredicateCountFMask = 0xFF38F000,
+  SVEIncDecByPredicateCountMask = 0xFF3FFE00,
+  SQINCP_z_p_z = SVEIncDecByPredicateCountFixed,
+  SQINCP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00000800,
+  SQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00000C00,
+  UQINCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00010000,
+  UQINCP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00010800,
+  UQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00010C00,
+  SQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00020000,
+  SQDECP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00020800,
+  SQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00020C00,
+  UQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00030000,
+  UQDECP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00030800,
+  UQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00030C00,
+  INCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00040000,
+  INCP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00040800,
+  DECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00050000,
+  DECP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00050800
+};
+
+enum SVEIncDecRegisterByElementCountOp {
+  SVEIncDecRegisterByElementCountFixed = 0x0430E000,
+  SVEIncDecRegisterByElementCountFMask = 0xFF30F800,
+  SVEIncDecRegisterByElementCountMask = 0xFFF0FC00,
+  INCB_r_rs = SVEIncDecRegisterByElementCountFixed,
+  DECB_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00000400,
+  INCH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400000,
+  DECH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400400,
+  INCW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800000,
+  DECW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800400,
+  INCD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00000,
+  DECD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00400
+};
+
+enum SVEIncDecVectorByElementCountOp {
+  SVEIncDecVectorByElementCountFixed = 0x0430C000,
+  SVEIncDecVectorByElementCountFMask = 0xFF30F800,
+  SVEIncDecVectorByElementCountMask = 0xFFF0FC00,
+  INCH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400000,
+  DECH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400400,
+  INCW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800000,
+  DECW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800400,
+  INCD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00000,
+  DECD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00400
+};
+
+enum SVEIndexGenerationOp {
+  SVEIndexGenerationFixed = 0x04204000,
+  SVEIndexGenerationFMask = 0xFF20F000,
+  SVEIndexGenerationMask = 0xFF20FC00,
+  INDEX_z_ii = SVEIndexGenerationFixed,
+  INDEX_z_ri = SVEIndexGenerationFixed | 0x00000400,
+  INDEX_z_ir = SVEIndexGenerationFixed | 0x00000800,
+  INDEX_z_rr = SVEIndexGenerationFixed | 0x00000C00
+};
+
+enum SVEInsertGeneralRegisterOp {
+  SVEInsertGeneralRegisterFixed = 0x05243800,
+  SVEInsertGeneralRegisterFMask = 0xFF3FFC00,
+  SVEInsertGeneralRegisterMask = 0xFF3FFC00,
+  INSR_z_r = SVEInsertGeneralRegisterFixed
+};
+
+enum SVEInsertSIMDFPScalarRegisterOp {
+  SVEInsertSIMDFPScalarRegisterFixed = 0x05343800,
+  SVEInsertSIMDFPScalarRegisterFMask = 0xFF3FFC00,
+  SVEInsertSIMDFPScalarRegisterMask = 0xFF3FFC00,
+  INSR_z_v = SVEInsertSIMDFPScalarRegisterFixed
+};
+
+enum SVEIntAddSubtractImm_UnpredicatedOp {
+  SVEIntAddSubtractImm_UnpredicatedFixed = 0x2520C000,
+  SVEIntAddSubtractImm_UnpredicatedFMask = 0xFF38C000,
+  SVEIntAddSubtractImm_UnpredicatedMask = 0xFF3FC000,
+  ADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed,
+  SUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00010000,
+  SUBR_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00030000,
+  SQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00040000,
+  UQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00050000,
+  SQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00060000,
+  UQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00070000
+};
+
+enum SVEIntAddSubtractVectors_PredicatedOp {
+  SVEIntAddSubtractVectors_PredicatedFixed = 0x04000000,
+  SVEIntAddSubtractVectors_PredicatedFMask = 0xFF38E000,
+  SVEIntAddSubtractVectors_PredicatedMask = 0xFF3FE000,
+  ADD_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed,
+  SUB_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00010000,
+  SUBR_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEIntArithmeticUnpredicatedOp {
+  SVEIntArithmeticUnpredicatedFixed = 0x04200000,
+  SVEIntArithmeticUnpredicatedFMask = 0xFF20E000,
+  SVEIntArithmeticUnpredicatedMask = 0xFF20FC00,
+  ADD_z_zz = SVEIntArithmeticUnpredicatedFixed,
+  SUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00000400,
+  SQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001000,
+  UQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001400,
+  SQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001800,
+  UQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001C00
+};
+
+enum SVEIntCompareScalarCountAndLimitOp {
+  SVEIntCompareScalarCountAndLimitFixed = 0x25200000,
+  SVEIntCompareScalarCountAndLimitFMask = 0xFF20E000,
+  SVEIntCompareScalarCountAndLimitMask = 0xFF20EC10,
+  WHILELT_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000400,
+  WHILELE_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000410,
+  WHILELO_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C00,
+  WHILELS_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C10
+};
+
+enum SVEIntCompareSignedImmOp {
+  SVEIntCompareSignedImmFixed = 0x25000000,
+  SVEIntCompareSignedImmFMask = 0xFF204000,
+  SVEIntCompareSignedImmMask = 0xFF20E010,
+  CMPGE_p_p_zi = SVEIntCompareSignedImmFixed,
+  CMPGT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00000010,
+  CMPLT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002000,
+  CMPLE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002010,
+  CMPEQ_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008000,
+  CMPNE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008010
+};
+
+enum SVEIntCompareUnsignedImmOp {
+  SVEIntCompareUnsignedImmFixed = 0x24200000,
+  SVEIntCompareUnsignedImmFMask = 0xFF200000,
+  SVEIntCompareUnsignedImmMask = 0xFF202010,
+  CMPHS_p_p_zi = SVEIntCompareUnsignedImmFixed,
+  CMPHI_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00000010,
+  CMPLO_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002000,
+  CMPLS_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002010
+};
+
+enum SVEIntCompareVectorsOp {
+  SVEIntCompareVectorsFixed = 0x24000000,
+  SVEIntCompareVectorsFMask = 0xFF200000,
+  SVEIntCompareVectorsMask = 0xFF20E010,
+  CMPHS_p_p_zz = SVEIntCompareVectorsFixed,
+  CMPHI_p_p_zz = SVEIntCompareVectorsFixed | 0x00000010,
+  CMPEQ_p_p_zw = SVEIntCompareVectorsFixed | 0x00002000,
+  CMPNE_p_p_zw = SVEIntCompareVectorsFixed | 0x00002010,
+  CMPGE_p_p_zw = SVEIntCompareVectorsFixed | 0x00004000,
+  CMPGT_p_p_zw = SVEIntCompareVectorsFixed | 0x00004010,
+  CMPLT_p_p_zw = SVEIntCompareVectorsFixed | 0x00006000,
+  CMPLE_p_p_zw = SVEIntCompareVectorsFixed | 0x00006010,
+  CMPGE_p_p_zz = SVEIntCompareVectorsFixed | 0x00008000,
+  CMPGT_p_p_zz = SVEIntCompareVectorsFixed | 0x00008010,
+  CMPEQ_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A000,
+  CMPNE_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A010,
+  CMPHS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C000,
+  CMPHI_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C010,
+  CMPLO_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E000,
+  CMPLS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E010
+};
+
+enum SVEIntConvertToFPOp {
+  SVEIntConvertToFPFixed = 0x6510A000,
+  SVEIntConvertToFPFMask = 0xFF38E000,
+  SVEIntConvertToFPMask = 0xFFFFE000,
+  SCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00420000,
+  UCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00430000,
+  SCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00440000,
+  UCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00450000,
+  SCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00460000,
+  UCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00470000,
+  SCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00840000,
+  UCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00850000,
+  SCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C00000,
+  UCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C10000,
+  SCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C40000,
+  UCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C50000,
+  SCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C60000,
+  UCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C70000
+};
+
+enum SVEIntDivideVectors_PredicatedOp {
+  SVEIntDivideVectors_PredicatedFixed = 0x04140000,
+  SVEIntDivideVectors_PredicatedFMask = 0xFF3CE000,
+  SVEIntDivideVectors_PredicatedMask = 0xFF3FE000,
+  SDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed,
+  UDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00010000,
+  SDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00020000,
+  UDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEIntMinMaxDifference_PredicatedOp {
+  SVEIntMinMaxDifference_PredicatedFixed = 0x04080000,
+  SVEIntMinMaxDifference_PredicatedFMask = 0xFF38E000,
+  SVEIntMinMaxDifference_PredicatedMask = 0xFF3FE000,
+  SMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed,
+  UMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00010000,
+  SMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00020000,
+  UMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00030000,
+  SABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00040000,
+  UABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00050000
+};
+
+enum SVEIntMinMaxImm_UnpredicatedOp {
+  SVEIntMinMaxImm_UnpredicatedFixed = 0x2528C000,
+  SVEIntMinMaxImm_UnpredicatedFMask = 0xFF38C000,
+  SVEIntMinMaxImm_UnpredicatedMask = 0xFF3FE000,
+  SMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed,
+  UMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00010000,
+  SMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00020000,
+  UMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00030000
+};
+
+enum SVEIntMulAddPredicatedOp {
+  SVEIntMulAddPredicatedFixed = 0x04004000,
+  SVEIntMulAddPredicatedFMask = 0xFF204000,
+  SVEIntMulAddPredicatedMask = 0xFF20E000,
+  MLA_z_p_zzz = SVEIntMulAddPredicatedFixed,
+  MLS_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00002000,
+  MAD_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00008000,
+  MSB_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x0000A000
+};
+
+enum SVEIntMulAddUnpredicatedOp {
+  SVEIntMulAddUnpredicatedFixed = 0x44000000,
+  SVEIntMulAddUnpredicatedFMask = 0xFF208000,
+  SVEIntMulAddUnpredicatedMask = 0xFF20FC00,
+  SDOT_z_zzz = SVEIntMulAddUnpredicatedFixed,
+  UDOT_z_zzz = SVEIntMulAddUnpredicatedFixed | 0x00000400
+};
+
+enum SVEIntMulImm_UnpredicatedOp {
+  SVEIntMulImm_UnpredicatedFixed = 0x2530C000,
+  SVEIntMulImm_UnpredicatedFMask = 0xFF38C000,
+  SVEIntMulImm_UnpredicatedMask = 0xFF3FE000,
+  MUL_z_zi = SVEIntMulImm_UnpredicatedFixed
+};
+
+enum SVEIntMulVectors_PredicatedOp {
+  SVEIntMulVectors_PredicatedFixed = 0x04100000,
+  SVEIntMulVectors_PredicatedFMask = 0xFF3CE000,
+  SVEIntMulVectors_PredicatedMask = 0xFF3FE000,
+  MUL_z_p_zz = SVEIntMulVectors_PredicatedFixed,
+  SMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00020000,
+  UMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00030000
+};
+
+enum SVEMovprfxOp {
+  SVEMovprfxFixed = 0x04002000,
+  SVEMovprfxFMask = 0xFF20E000,
+  SVEMovprfxMask = 0xFF3EE000,
+  MOVPRFX_z_p_z = SVEMovprfxFixed | 0x00100000
+};
+
+enum SVEIntReductionOp {
+  SVEIntReductionFixed = 0x04002000,
+  SVEIntReductionFMask = 0xFF20E000,
+  SVEIntReductionMask = 0xFF3FE000,
+  SADDV_r_p_z = SVEIntReductionFixed,
+  UADDV_r_p_z = SVEIntReductionFixed | 0x00010000,
+  SMAXV_r_p_z = SVEIntReductionFixed | 0x00080000,
+  UMAXV_r_p_z = SVEIntReductionFixed | 0x00090000,
+  SMINV_r_p_z = SVEIntReductionFixed | 0x000A0000,
+  UMINV_r_p_z = SVEIntReductionFixed | 0x000B0000
+};
+
+enum SVEIntReductionLogicalOp {
+  SVEIntReductionLogicalFixed = 0x04182000,
+  SVEIntReductionLogicalFMask = 0xFF38E000,
+  SVEIntReductionLogicalMask = 0xFF3FE000,
+  ORV_r_p_z = SVEIntReductionLogicalFixed | 0x00180000,
+  EORV_r_p_z = SVEIntReductionLogicalFixed | 0x00190000,
+  ANDV_r_p_z = SVEIntReductionLogicalFixed | 0x001A0000
+};
+
+enum SVEIntUnaryArithmeticPredicatedOp {
+  SVEIntUnaryArithmeticPredicatedFixed = 0x0400A000,
+  SVEIntUnaryArithmeticPredicatedFMask = 0xFF20E000,
+  SVEIntUnaryArithmeticPredicatedMask = 0xFF3FE000,
+  SXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00100000,
+  UXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00110000,
+  SXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00120000,
+  UXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00130000,
+  SXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00140000,
+  UXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00150000,
+  ABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00160000,
+  NEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00170000,
+  CLS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00180000,
+  CLZ_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00190000,
+  CNT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001A0000,
+  CNOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001B0000,
+  FABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001C0000,
+  FNEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001D0000,
+  NOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001E0000
+};
+
+enum SVELoadAndBroadcastElementOp {
+  SVELoadAndBroadcastElementFixed = 0x84408000,
+  SVELoadAndBroadcastElementFMask = 0xFE408000,
+  SVELoadAndBroadcastElementMask = 0xFFC0E000,
+  LD1RB_z_p_bi_u8 = SVELoadAndBroadcastElementFixed,
+  LD1RB_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00002000,
+  LD1RB_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00004000,
+  LD1RB_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00006000,
+  LD1RSW_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x00800000,
+  LD1RH_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00802000,
+  LD1RH_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00804000,
+  LD1RH_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00806000,
+  LD1RSH_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01000000,
+  LD1RSH_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01002000,
+  LD1RW_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x01004000,
+  LD1RW_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01006000,
+  LD1RSB_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01800000,
+  LD1RSB_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01802000,
+  LD1RSB_z_p_bi_s16 = SVELoadAndBroadcastElementFixed | 0x01804000,
+  LD1RD_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01806000
+};
+
+enum SVELoadAndBroadcastQuadword_ScalarPlusImmOp {
+  SVELoadAndBroadcastQuadword_ScalarPlusImmFixed = 0xA4002000,
+  SVELoadAndBroadcastQuadword_ScalarPlusImmFMask = 0xFE10E000,
+  SVELoadAndBroadcastQuadword_ScalarPlusImmMask = 0xFFF0E000,
+  LD1RQB_z_p_bi_u8 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed,
+  LD1RQH_z_p_bi_u16 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x00800000,
+  LD1RQW_z_p_bi_u32 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01000000,
+  LD1RQD_z_p_bi_u64 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01800000
+};
+
+enum SVELoadAndBroadcastQuadword_ScalarPlusScalarOp {
+  SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed = 0xA4000000,
+  SVELoadAndBroadcastQuadword_ScalarPlusScalarFMask = 0xFE00E000,
+  SVELoadAndBroadcastQuadword_ScalarPlusScalarMask = 0xFFE0E000,
+  LD1RQB_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed,
+  LD1RQH_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x00800000,
+  LD1RQW_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01000000,
+  LD1RQD_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01800000
+};
+
+enum SVELoadMultipleStructures_ScalarPlusImmOp {
+  SVELoadMultipleStructures_ScalarPlusImmFixed = 0xA400E000,
+  SVELoadMultipleStructures_ScalarPlusImmFMask = 0xFE10E000,
+  SVELoadMultipleStructures_ScalarPlusImmMask = 0xFFF0E000,
+  LD2B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00200000,
+  LD3B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00400000,
+  LD4B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00600000,
+  LD2H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00A00000,
+  LD3H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00C00000,
+  LD4H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00E00000,
+  LD2W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01200000,
+  LD3W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01400000,
+  LD4W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01600000,
+  LD2D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01A00000,
+  LD3D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01C00000,
+  LD4D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVELoadMultipleStructures_ScalarPlusScalarOp {
+  SVELoadMultipleStructures_ScalarPlusScalarFixed = 0xA400C000,
+  SVELoadMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000,
+  SVELoadMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000,
+  LD2B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00200000,
+  LD3B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00400000,
+  LD4B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00600000,
+  LD2H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00A00000,
+  LD3H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00C00000,
+  LD4H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00E00000,
+  LD2W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01200000,
+  LD3W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01400000,
+  LD4W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01600000,
+  LD2D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01A00000,
+  LD3D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01C00000,
+  LD4D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVELoadPredicateRegisterOp {
+  SVELoadPredicateRegisterFixed = 0x85800000,
+  SVELoadPredicateRegisterFMask = 0xFFC0E010,
+  SVELoadPredicateRegisterMask = 0xFFC0E010,
+  LDR_p_bi = SVELoadPredicateRegisterFixed
+};
+
+enum SVELoadVectorRegisterOp {
+  SVELoadVectorRegisterFixed = 0x85804000,
+  SVELoadVectorRegisterFMask = 0xFFC0E000,
+  SVELoadVectorRegisterMask = 0xFFC0E000,
+  LDR_z_bi = SVELoadVectorRegisterFixed
+};
+
+enum SVEMulIndexOp {
+  SVEMulIndexFixed = 0x44200000,
+  SVEMulIndexFMask = 0xFF200000,
+  SVEMulIndexMask = 0xFFE0FC00,
+  SDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800000,
+  UDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800400,
+  SDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00000,
+  UDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00400
+};
+
+enum SVEPartitionBreakConditionOp {
+  SVEPartitionBreakConditionFixed = 0x25104000,
+  SVEPartitionBreakConditionFMask = 0xFF3FC200,
+  SVEPartitionBreakConditionMask = 0xFFFFC200,
+  BRKA_p_p_p = SVEPartitionBreakConditionFixed,
+  BRKAS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00400000,
+  BRKB_p_p_p = SVEPartitionBreakConditionFixed | 0x00800000,
+  BRKBS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00C00000
+};
+
+enum SVEPermutePredicateElementsOp {
+  SVEPermutePredicateElementsFixed = 0x05204000,
+  SVEPermutePredicateElementsFMask = 0xFF30E210,
+  SVEPermutePredicateElementsMask = 0xFF30FE10,
+  ZIP1_p_pp = SVEPermutePredicateElementsFixed,
+  ZIP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000400,
+  UZP1_p_pp = SVEPermutePredicateElementsFixed | 0x00000800,
+  UZP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000C00,
+  TRN1_p_pp = SVEPermutePredicateElementsFixed | 0x00001000,
+  TRN2_p_pp = SVEPermutePredicateElementsFixed | 0x00001400
+};
+
+enum SVEPermuteVectorExtractOp {
+  SVEPermuteVectorExtractFixed = 0x05200000,
+  SVEPermuteVectorExtractFMask = 0xFF20E000,
+  SVEPermuteVectorExtractMask = 0xFFE0E000,
+  EXT_z_zi_des = SVEPermuteVectorExtractFixed
+};
+
+enum SVEPermuteVectorInterleavingOp {
+  SVEPermuteVectorInterleavingFixed = 0x05206000,
+  SVEPermuteVectorInterleavingFMask = 0xFF20E000,
+  SVEPermuteVectorInterleavingMask = 0xFF20FC00,
+  ZIP1_z_zz = SVEPermuteVectorInterleavingFixed,
+  ZIP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000400,
+  UZP1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000800,
+  UZP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000C00,
+  TRN1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001000,
+  TRN2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001400
+};
+
+enum SVEPredicateCountOp {
+  SVEPredicateCountFixed = 0x25208000,
+  SVEPredicateCountFMask = 0xFF38C000,
+  SVEPredicateCountMask = 0xFF3FC200,
+  CNTP_r_p_p = SVEPredicateCountFixed
+};
+
+enum SVEPredicateFirstActiveOp {
+  SVEPredicateFirstActiveFixed = 0x2518C000,
+  SVEPredicateFirstActiveFMask = 0xFF3FFE10,
+  SVEPredicateFirstActiveMask = 0xFFFFFE10,
+  PFIRST_p_p_p = SVEPredicateFirstActiveFixed | 0x00400000
+};
+
+enum SVEPredicateInitializeOp {
+  SVEPredicateInitializeFixed = 0x2518E000,
+  SVEPredicateInitializeFMask = 0xFF3EFC10,
+  SVEPredicateInitializeMask = 0xFF3FFC10,
+  SVEPredicateInitializeSetFlagsBit = 0x00010000,
+  PTRUE_p_s = SVEPredicateInitializeFixed | 0x00000000,
+  PTRUES_p_s = SVEPredicateInitializeFixed | SVEPredicateInitializeSetFlagsBit
+};
+
+enum SVEPredicateLogicalOp {
+  SVEPredicateLogicalFixed = 0x25004000,
+  SVEPredicateLogicalFMask = 0xFF30C000,
+  SVEPredicateLogicalMask = 0xFFF0C210,
+  SVEPredicateLogicalSetFlagsBit = 0x00400000,
+  AND_p_p_pp_z = SVEPredicateLogicalFixed,
+  ANDS_p_p_pp_z = AND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  BIC_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000010,
+  BICS_p_p_pp_z = BIC_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  EOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000200,
+  EORS_p_p_pp_z = EOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  ORR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800000,
+  ORRS_p_p_pp_z = ORR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  ORN_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800010,
+  ORNS_p_p_pp_z = ORN_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  NAND_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800210,
+  NANDS_p_p_pp_z = NAND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  NOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800200,
+  NORS_p_p_pp_z = NOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit,
+  SEL_p_p_pp = SVEPredicateLogicalFixed | 0x00000210
+};
+
+enum SVEPredicateNextActiveOp {
+  SVEPredicateNextActiveFixed = 0x2519C400,
+  SVEPredicateNextActiveFMask = 0xFF3FFE10,
+  SVEPredicateNextActiveMask = 0xFF3FFE10,
+  PNEXT_p_p_p = SVEPredicateNextActiveFixed
+};
+
+enum SVEPredicateReadFromFFR_PredicatedOp {
+  SVEPredicateReadFromFFR_PredicatedFixed = 0x2518F000,
+  SVEPredicateReadFromFFR_PredicatedFMask = 0xFF3FFE10,
+  SVEPredicateReadFromFFR_PredicatedMask = 0xFFFFFE10,
+  RDFFR_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed,
+  RDFFRS_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed | 0x00400000
+};
+
+enum SVEPredicateReadFromFFR_UnpredicatedOp {
+  SVEPredicateReadFromFFR_UnpredicatedFixed = 0x2519F000,
+  SVEPredicateReadFromFFR_UnpredicatedFMask = 0xFF3FFFF0,
+  SVEPredicateReadFromFFR_UnpredicatedMask = 0xFFFFFFF0,
+  RDFFR_p_f = SVEPredicateReadFromFFR_UnpredicatedFixed
+};
+
+enum SVEPredicateTestOp {
+  SVEPredicateTestFixed = 0x2510C000,
+  SVEPredicateTestFMask = 0xFF3FC210,
+  SVEPredicateTestMask = 0xFFFFC21F,
+  PTEST_p_p = SVEPredicateTestFixed | 0x00400000
+};
+
+enum SVEPredicateZeroOp {
+  SVEPredicateZeroFixed = 0x2518E400,
+  SVEPredicateZeroFMask = 0xFF3FFFF0,
+  SVEPredicateZeroMask = 0xFFFFFFF0,
+  PFALSE_p = SVEPredicateZeroFixed
+};
+
+enum SVEPropagateBreakOp {
+  SVEPropagateBreakFixed = 0x2500C000,
+  SVEPropagateBreakFMask = 0xFF30C000,
+  SVEPropagateBreakMask = 0xFFF0C210,
+  BRKPA_p_p_pp = SVEPropagateBreakFixed,
+  BRKPB_p_p_pp = SVEPropagateBreakFixed | 0x00000010,
+  BRKPAS_p_p_pp = SVEPropagateBreakFixed | 0x00400000,
+  BRKPBS_p_p_pp = SVEPropagateBreakFixed | 0x00400010
+};
+
+enum SVEPropagateBreakToNextPartitionOp {
+  SVEPropagateBreakToNextPartitionFixed = 0x25184000,
+  SVEPropagateBreakToNextPartitionFMask = 0xFFBFC210,
+  SVEPropagateBreakToNextPartitionMask = 0xFFFFC210,
+  BRKN_p_p_pp = SVEPropagateBreakToNextPartitionFixed,
+  BRKNS_p_p_pp = SVEPropagateBreakToNextPartitionFixed | 0x00400000
+};
+
+enum SVEReversePredicateElementsOp {
+  SVEReversePredicateElementsFixed = 0x05344000,
+  SVEReversePredicateElementsFMask = 0xFF3FFE10,
+  SVEReversePredicateElementsMask = 0xFF3FFE10,
+  REV_p_p = SVEReversePredicateElementsFixed
+};
+
+enum SVEReverseVectorElementsOp {
+  SVEReverseVectorElementsFixed = 0x05383800,
+  SVEReverseVectorElementsFMask = 0xFF3FFC00,
+  SVEReverseVectorElementsMask = 0xFF3FFC00,
+  REV_z_z = SVEReverseVectorElementsFixed
+};
+
+enum SVEReverseWithinElementsOp {
+  SVEReverseWithinElementsFixed = 0x05248000,
+  SVEReverseWithinElementsFMask = 0xFF3CE000,
+  SVEReverseWithinElementsMask = 0xFF3FE000,
+  REVB_z_z = SVEReverseWithinElementsFixed,
+  REVH_z_z = SVEReverseWithinElementsFixed | 0x00010000,
+  REVW_z_z = SVEReverseWithinElementsFixed | 0x00020000,
+  RBIT_z_p_z = SVEReverseWithinElementsFixed | 0x00030000
+};
+
+enum SVESaturatingIncDecRegisterByElementCountOp {
+  SVESaturatingIncDecRegisterByElementCountFixed = 0x0420F000,
+  SVESaturatingIncDecRegisterByElementCountFMask = 0xFF20F000,
+  SVESaturatingIncDecRegisterByElementCountMask = 0xFFF0FC00,
+  SQINCB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed,
+  UQINCB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000400,
+  SQDECB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000800,
+  UQDECB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000C00,
+  SQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100000,
+  UQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100400,
+  SQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100800,
+  UQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100C00,
+  SQINCH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400000,
+  UQINCH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400400,
+  SQDECH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400800,
+  UQDECH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400C00,
+  SQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500000,
+  UQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500400,
+  SQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500800,
+  UQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500C00,
+  SQINCW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800000,
+  UQINCW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800400,
+  SQDECW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800800,
+  UQDECW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800C00,
+  SQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900000,
+  UQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900400,
+  SQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900800,
+  UQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900C00,
+  SQINCD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00000,
+  UQINCD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00400,
+  SQDECD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00800,
+  UQDECD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00C00,
+  SQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00000,
+  UQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00400,
+  SQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00800,
+  UQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00C00
+};
+
+enum SVESaturatingIncDecVectorByElementCountOp {
+  SVESaturatingIncDecVectorByElementCountFixed = 0x0420C000,
+  SVESaturatingIncDecVectorByElementCountFMask = 0xFF30F000,
+  SVESaturatingIncDecVectorByElementCountMask = 0xFFF0FC00,
+  SQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400000,
+  UQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400400,
+  SQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400800,
+  UQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400C00,
+  SQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800000,
+  UQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800400,
+  SQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800800,
+  UQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800C00,
+  SQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00000,
+  UQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00400,
+  SQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00800,
+  UQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00C00
+};
+
+enum SVEStackFrameAdjustmentOp {
+  SVEStackFrameAdjustmentFixed = 0x04205000,
+  SVEStackFrameAdjustmentFMask = 0xFFA0F800,
+  SVEStackFrameAdjustmentMask = 0xFFE0F800,
+  ADDVL_r_ri = SVEStackFrameAdjustmentFixed,
+  ADDPL_r_ri = SVEStackFrameAdjustmentFixed | 0x00400000
+};
+
+enum SVEStackFrameSizeOp {
+  SVEStackFrameSizeFixed = 0x04BF5000,
+  SVEStackFrameSizeFMask = 0xFFFFF800,
+  SVEStackFrameSizeMask = 0xFFFFF800,
+  RDVL_r_i = SVEStackFrameSizeFixed
+};
+
+enum SVEStoreMultipleStructures_ScalarPlusImmOp {
+  SVEStoreMultipleStructures_ScalarPlusImmFixed = 0xE410E000,
+  SVEStoreMultipleStructures_ScalarPlusImmFMask = 0xFE10E000,
+  SVEStoreMultipleStructures_ScalarPlusImmMask = 0xFFF0E000,
+  ST2B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00200000,
+  ST3B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00400000,
+  ST4B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00600000,
+  ST2H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00A00000,
+  ST3H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00C00000,
+  ST4H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00E00000,
+  ST2W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01200000,
+  ST3W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01400000,
+  ST4W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01600000,
+  ST2D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01A00000,
+  ST3D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01C00000,
+  ST4D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01E00000
+};
+
+enum SVEStoreMultipleStructures_ScalarPlusScalarOp {
+  SVEStoreMultipleStructures_ScalarPlusScalarFixed = 0xE4006000,
+  SVEStoreMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000,
+  SVEStoreMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000,
+  ST2B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00200000,
+  ST3B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00400000,
+  ST4B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00600000,
+  ST2H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00A00000,
+  ST3H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00C00000,
+  ST4H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00E00000,
+  ST2W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01200000,
+  ST3W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01400000,
+  ST4W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01600000,
+  ST2D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01A00000,
+  ST3D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01C00000,
+  ST4D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01E00000
+};
+
+enum SVEStorePredicateRegisterOp {
+  SVEStorePredicateRegisterFixed = 0xE5800000,
+  SVEStorePredicateRegisterFMask = 0xFFC0E010,
+  SVEStorePredicateRegisterMask = 0xFFC0E010,
+  STR_p_bi = SVEStorePredicateRegisterFixed
+};
+
+enum SVEStoreVectorRegisterOp {
+  SVEStoreVectorRegisterFixed = 0xE5804000,
+  SVEStoreVectorRegisterFMask = 0xFFC0E000,
+  SVEStoreVectorRegisterMask = 0xFFC0E000,
+  STR_z_bi = SVEStoreVectorRegisterFixed
+};
+
+enum SVETableLookupOp {
+  SVETableLookupFixed = 0x05203000,
+  SVETableLookupFMask = 0xFF20FC00,
+  SVETableLookupMask = 0xFF20FC00,
+  TBL_z_zz_1 = SVETableLookupFixed
+};
+
+enum SVEUnpackPredicateElementsOp {
+  SVEUnpackPredicateElementsFixed = 0x05304000,
+  SVEUnpackPredicateElementsFMask = 0xFFFEFE10,
+  SVEUnpackPredicateElementsMask = 0xFFFFFE10,
+  PUNPKLO_p_p = SVEUnpackPredicateElementsFixed,
+  PUNPKHI_p_p = SVEUnpackPredicateElementsFixed | 0x00010000
+};
+
+enum SVEUnpackVectorElementsOp {
+  SVEUnpackVectorElementsFixed = 0x05303800,
+  SVEUnpackVectorElementsFMask = 0xFF3CFC00,
+  SVEUnpackVectorElementsMask = 0xFF3FFC00,
+  SUNPKLO_z_z = SVEUnpackVectorElementsFixed,
+  SUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00010000,
+  UUNPKLO_z_z = SVEUnpackVectorElementsFixed | 0x00020000,
+  UUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00030000
+};
+
+enum SVEVectorSelectOp {
+  SVEVectorSelectFixed = 0x0520C000,
+  SVEVectorSelectFMask = 0xFF20C000,
+  SVEVectorSelectMask = 0xFF20C000,
+  SEL_z_p_zz = SVEVectorSelectFixed
+};
+
+enum SVEVectorSplice_DestructiveOp {
+  SVEVectorSplice_DestructiveFixed = 0x052C8000,
+  SVEVectorSplice_DestructiveFMask = 0xFF3FE000,
+  SVEVectorSplice_DestructiveMask = 0xFF3FE000,
+  SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed
+};
+
 enum ReservedOp {
   ReservedFixed = 0x00000000,
   ReservedFMask = 0x1E000000,
   ReservedMask = 0xFFFF0000,
-
   UDF = ReservedFixed | 0x00000000
 };
 
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index f5e4fca5..a31e010d 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -39,10 +39,15 @@ namespace aarch64 {
 
 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
+const IDRegister::Field AA64PFR0::kRAS(28);
 const IDRegister::Field AA64PFR0::kSVE(32);
 const IDRegister::Field AA64PFR0::kDIT(48);
+const IDRegister::Field AA64PFR0::kCSV2(56);
+const IDRegister::Field AA64PFR0::kCSV3(60);
 
 const IDRegister::Field AA64PFR1::kBT(0);
+const IDRegister::Field AA64PFR1::kSSBS(4);
+const IDRegister::Field AA64PFR1::kMTE(8);
 
 const IDRegister::Field AA64ISAR0::kAES(4);
 const IDRegister::Field AA64ISAR0::kSHA1(8);
@@ -56,6 +61,7 @@ const IDRegister::Field AA64ISAR0::kSM4(40);
 const IDRegister::Field AA64ISAR0::kDP(44);
 const IDRegister::Field AA64ISAR0::kFHM(48);
 const IDRegister::Field AA64ISAR0::kTS(52);
+const IDRegister::Field AA64ISAR0::kRNDR(60);
 
 const IDRegister::Field AA64ISAR1::kDPB(0);
 const IDRegister::Field AA64ISAR1::kAPA(4);
@@ -68,23 +74,41 @@ const IDRegister::Field AA64ISAR1::kGPI(28);
 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
 const IDRegister::Field AA64ISAR1::kSB(36);
 const IDRegister::Field AA64ISAR1::kSPECRES(40);
+const IDRegister::Field AA64ISAR1::kBF16(44);
+const IDRegister::Field AA64ISAR1::kDGH(48);
+const IDRegister::Field AA64ISAR1::kI8MM(52);
 
 const IDRegister::Field AA64MMFR1::kLO(16);
 
+const IDRegister::Field AA64MMFR2::kAT(32);
+
+const IDRegister::Field AA64ZFR0::kBF16(20);
+const IDRegister::Field AA64ZFR0::kI8MM(44);
+const IDRegister::Field AA64ZFR0::kF32MM(52);
+const IDRegister::Field AA64ZFR0::kF64MM(56);
+
 CPUFeatures AA64PFR0::GetCPUFeatures() const {
   CPUFeatures f;
   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
   if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
   if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
   if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
+  if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
   if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
   if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
+  if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
+  if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
+  if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
   return f;
 }
 
 CPUFeatures AA64PFR1::GetCPUFeatures() const {
   CPUFeatures f;
   if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
+  if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
+  if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
+  if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
+  if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
   return f;
 }
 
@@ -105,20 +129,38 @@ CPUFeatures AA64ISAR0::GetCPUFeatures() const {
   if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
   if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
   if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
+  if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
   return f;
 }
 
 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
   CPUFeatures f;
   if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
+  if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
   if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
   if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
   if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
   if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
   if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
+  if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
+  if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+  if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
+
+  // Only one of these fields should be non-zero, but they have the same
+  // encodings, so merge the logic.
+  int apx = std::max(Get(kAPI), Get(kAPA));
+  if (apx >= 1) {
+    f.Combine(CPUFeatures::kPAuth);
+    // APA (rather than API) indicates QARMA.
+    if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
+    if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
+    if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
+    if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
+    if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
+  }
 
-  if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
-  if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
   if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
   if (Get(kGPA) >= 1) {
     f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
@@ -132,6 +174,23 @@ CPUFeatures AA64MMFR1::GetCPUFeatures() const {
   return f;
 }
 
+CPUFeatures AA64MMFR2::GetCPUFeatures() const {
+  CPUFeatures f;
+  if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
+  return f;
+}
+
+CPUFeatures AA64ZFR0::GetCPUFeatures() const {
+  // This register is only available with SVE, but reads-as-zero in its absence,
+  // so it's always safe to read it.
+  CPUFeatures f;
+  if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
+  if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
+  if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
+  if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+  return f;
+}
+
 int IDRegister::Get(IDRegister::Field field) const {
   int msb = field.GetMsb();
   int lsb = field.GetLsb();
@@ -149,7 +208,8 @@ int IDRegister::Get(IDRegister::Field field) const {
 
 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
   CPUFeatures f;
-#define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
+#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
+  f.Combine(Read##NAME().GetCPUFeatures());
   VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
 #undef VIXL_COMBINE_ID_REG
   return f;
@@ -163,49 +223,73 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
   // than explicit bits, but explicit bits allow us to identify features that
   // the toolchain doesn't know about.
-  static const CPUFeatures::Feature kFeatureBits[] = {
-      // Bits 0-7
-      CPUFeatures::kFP,
-      CPUFeatures::kNEON,
-      CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
-      CPUFeatures::kAES,
-      CPUFeatures::kPmull1Q,
-      CPUFeatures::kSHA1,
-      CPUFeatures::kSHA2,
-      CPUFeatures::kCRC32,
-      // Bits 8-15
-      CPUFeatures::kAtomics,
-      CPUFeatures::kFPHalf,
-      CPUFeatures::kNEONHalf,
-      CPUFeatures::kIDRegisterEmulation,
-      CPUFeatures::kRDM,
-      CPUFeatures::kJSCVT,
-      CPUFeatures::kFcma,
-      CPUFeatures::kRCpc,
-      // Bits 16-23
-      CPUFeatures::kDCPoP,
-      CPUFeatures::kSHA3,
-      CPUFeatures::kSM3,
-      CPUFeatures::kSM4,
-      CPUFeatures::kDotProduct,
-      CPUFeatures::kSHA512,
-      CPUFeatures::kSVE,
-      CPUFeatures::kFHM,
-      // Bits 24-27
-      CPUFeatures::kDIT,
-      CPUFeatures::kUSCAT,
-      CPUFeatures::kRCpcImm,
-      CPUFeatures::kFlagM
-      // Bits 28-31 are unassigned.
-  };
-  static const size_t kFeatureBitCount =
-      sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
-
-  unsigned long auxv = getauxval(AT_HWCAP);  // NOLINT(runtime/int)
-
-  VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
-  for (size_t i = 0; i < kFeatureBitCount; i++) {
-    if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
+  static const CPUFeatures::Feature kFeatureBits[] =
+      {// Bits 0-7
+       CPUFeatures::kFP,
+       CPUFeatures::kNEON,
+       CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
+       CPUFeatures::kAES,
+       CPUFeatures::kPmull1Q,
+       CPUFeatures::kSHA1,
+       CPUFeatures::kSHA2,
+       CPUFeatures::kCRC32,
+       // Bits 8-15
+       CPUFeatures::kAtomics,
+       CPUFeatures::kFPHalf,
+       CPUFeatures::kNEONHalf,
+       CPUFeatures::kIDRegisterEmulation,
+       CPUFeatures::kRDM,
+       CPUFeatures::kJSCVT,
+       CPUFeatures::kFcma,
+       CPUFeatures::kRCpc,
+       // Bits 16-23
+       CPUFeatures::kDCPoP,
+       CPUFeatures::kSHA3,
+       CPUFeatures::kSM3,
+       CPUFeatures::kSM4,
+       CPUFeatures::kDotProduct,
+       CPUFeatures::kSHA512,
+       CPUFeatures::kSVE,
+       CPUFeatures::kFHM,
+       // Bits 24-31
+       CPUFeatures::kDIT,
+       CPUFeatures::kUSCAT,
+       CPUFeatures::kRCpcImm,
+       CPUFeatures::kFlagM,
+       CPUFeatures::kSSBSControl,
+       CPUFeatures::kSB,
+       CPUFeatures::kPAuth,
+       CPUFeatures::kPAuthGeneric,
+       // Bits 32-39
+       CPUFeatures::kDCCVADP,
+       CPUFeatures::kNone,  // "sve2"
+       CPUFeatures::kNone,  // "sveaes"
+       CPUFeatures::kNone,  // "svepmull"
+       CPUFeatures::kNone,  // "svebitperm"
+       CPUFeatures::kNone,  // "svesha3"
+       CPUFeatures::kNone,  // "svesm4"
+       CPUFeatures::kFrintToFixedSizedInt,
+       // Bits 40-47
+       CPUFeatures::kSVEI8MM,
+       CPUFeatures::kSVEF32MM,
+       CPUFeatures::kSVEF64MM,
+       CPUFeatures::kSVEBF16,
+       CPUFeatures::kI8MM,
+       CPUFeatures::kBF16,
+       CPUFeatures::kDGH,
+       CPUFeatures::kRNG,
+       // Bits 48+
+       CPUFeatures::kBTI};
+
+  uint64_t hwcap_low32 = getauxval(AT_HWCAP);
+  uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
+  VIXL_ASSERT(IsUint32(hwcap_low32));
+  VIXL_ASSERT(IsUint32(hwcap_high32));
+  uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
+
+  VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
+  for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
+    if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
   }
 #endif  // VIXL_USE_LINUX_HWCAP
 
@@ -218,17 +302,17 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
 
 
 #ifdef __aarch64__
-#define VIXL_READ_ID_REG(NAME)                         \
-  NAME CPU::Read##NAME() {                             \
-    uint64_t value = 0;                                \
-    __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
-    return NAME(value);                                \
+#define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
+  NAME CPU::Read##NAME() {                     \
+    uint64_t value = 0;                        \
+    __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
+    return NAME(value);                        \
   }
 #else  // __aarch64__
-#define VIXL_READ_ID_REG(NAME)                                        \
-  NAME CPU::Read##NAME() {                                            \
-    /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
-    VIXL_ABORT();                                                     \
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) \
+  NAME CPU::Read##NAME() {              \
+    VIXL_UNREACHABLE();                 \
+    return NAME(0);                     \
   }
 #endif  // __aarch64__
 
@@ -282,6 +366,27 @@ uint32_t CPU::GetCacheType() {
 }
 
 
+// Query the SVE vector length. This requires CPUFeatures::kSVE.
+int CPU::ReadSVEVectorLengthInBits() {
+#ifdef __aarch64__
+  uint64_t vl;
+  // To support compilers that don't understand `rdvl`, encode the value
+  // directly and move it manually.
+  __asm__(
+      "   .word 0x04bf5100\n"  // rdvl x0, #8
+      "   mov %[vl], x0\n"
+      : [vl] "=r"(vl)
+      :
+      : "x0");
+  VIXL_ASSERT(vl <= INT_MAX);
+  return static_cast<int>(vl);
+#else
+  VIXL_UNREACHABLE();
+  return 0;
+#endif
+}
+
+
 void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
 #ifdef __aarch64__
   // Implement the cache synchronisation for all targets where AArch64 is the
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index d2b2ee87..2bf1e60f 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -56,7 +56,11 @@ class IDRegister {
    public:
     enum Type { kUnsigned, kSigned };
 
-    explicit Field(int lsb, Type type = kUnsigned) : lsb_(lsb), type_(type) {}
+    // This needs to be constexpr so that fields have "constant initialisation".
+    // This avoids initialisation order problems when these values are used to
+    // (dynamically) initialise static variables, etc.
+    explicit constexpr Field(int lsb, Type type = kUnsigned)
+        : lsb_(lsb), type_(type) {}
 
     static const int kMaxWidthInBits = 4;
 
@@ -92,8 +96,11 @@ class AA64PFR0 : public IDRegister {
  private:
   static const Field kFP;
   static const Field kAdvSIMD;
+  static const Field kRAS;
   static const Field kSVE;
   static const Field kDIT;
+  static const Field kCSV2;
+  static const Field kCSV3;
 };
 
 class AA64PFR1 : public IDRegister {
@@ -104,6 +111,8 @@ class AA64PFR1 : public IDRegister {
 
  private:
   static const Field kBT;
+  static const Field kSSBS;
+  static const Field kMTE;
 };
 
 class AA64ISAR0 : public IDRegister {
@@ -125,6 +134,7 @@ class AA64ISAR0 : public IDRegister {
   static const Field kDP;
   static const Field kFHM;
   static const Field kTS;
+  static const Field kRNDR;
 };
 
 class AA64ISAR1 : public IDRegister {
@@ -145,6 +155,9 @@ class AA64ISAR1 : public IDRegister {
   static const Field kFRINTTS;
   static const Field kSB;
   static const Field kSPECRES;
+  static const Field kBF16;
+  static const Field kDGH;
+  static const Field kI8MM;
 };
 
 class AA64MMFR1 : public IDRegister {
@@ -157,6 +170,29 @@ class AA64MMFR1 : public IDRegister {
   static const Field kLO;
 };
 
+class AA64MMFR2 : public IDRegister {
+ public:
+  explicit AA64MMFR2(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kAT;
+};
+
+class AA64ZFR0 : public IDRegister {
+ public:
+  explicit AA64ZFR0(uint64_t value) : IDRegister(value) {}
+
+  CPUFeatures GetCPUFeatures() const;
+
+ private:
+  static const Field kBF16;
+  static const Field kI8MM;
+  static const Field kF32MM;
+  static const Field kF64MM;
+};
+
 class CPU {
  public:
   // Initialise CPU support.
@@ -184,6 +220,9 @@ class CPU {
       CPUFeatures::QueryIDRegistersOption option =
           CPUFeatures::kQueryIDRegistersIfAvailable);
 
+  // Query the SVE vector length. This requires CPUFeatures::kSVE.
+  static int ReadSVEVectorLengthInBits();
+
   // Handle tagged pointers.
   template <typename T>
   static T SetPointerTag(T pointer, uint64_t tag) {
@@ -211,14 +250,18 @@ class CPU {
   }
 
  private:
-#define VIXL_AARCH64_ID_REG_LIST(V) \
-  V(AA64PFR0)                       \
-  V(AA64PFR1)                       \
-  V(AA64ISAR0)                      \
-  V(AA64ISAR1)                      \
-  V(AA64MMFR1)
-
-#define VIXL_READ_ID_REG(NAME) static NAME Read##NAME();
+#define VIXL_AARCH64_ID_REG_LIST(V)                                           \
+  V(AA64PFR0, "ID_AA64PFR0_EL1")                                              \
+  V(AA64PFR1, "ID_AA64PFR1_EL1")                                              \
+  V(AA64ISAR0, "ID_AA64ISAR0_EL1")                                            \
+  V(AA64ISAR1, "ID_AA64ISAR1_EL1")                                            \
+  V(AA64MMFR1, "ID_AA64MMFR1_EL1")                                            \
+  /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
+  /* read them, but some compilers don't accept the symbolic names. */        \
+  V(AA64MMFR2, "S3_0_C0_C7_2")                                                \
+  V(AA64ZFR0, "S3_0_C0_C4_4")
+
+#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME();
   // On native AArch64 platforms, read the named CPU ID registers. These require
   // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64
   // platforms.
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 474803a1..abe63d39 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -870,7 +870,6 @@ void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) {
     scope.Record(CPUFeatures::kFP);
     if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf);
   }
-  USE(instr);
 }
 
 void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) {
@@ -1068,6 +1067,165 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
   USE(instr);
 }
 
+// Most SVE visitors require only SVE.
+#define VIXL_SIMPLE_SVE_VISITOR_LIST(V)                          \
+  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
+  V(SVE32BitGatherLoad_VectorPlusImm)                            \
+  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
+  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
+  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
+  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
+  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
+  V(SVE32BitScatterStore_VectorPlusImm)                          \
+  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
+  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
+  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
+  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
+  V(SVE64BitGatherLoad_VectorPlusImm)                            \
+  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
+  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
+  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+  V(SVE64BitScatterStore_VectorPlusImm)                          \
+  V(SVEAddressGeneration)                                        \
+  V(SVEBitwiseLogicalUnpredicated)                               \
+  V(SVEBitwiseShiftUnpredicated)                                 \
+  V(SVEFFRInitialise)                                            \
+  V(SVEFFRWriteFromPredicate)                                    \
+  V(SVEFPAccumulatingReduction)                                  \
+  V(SVEFPArithmeticUnpredicated)                                 \
+  V(SVEFPCompareVectors)                                         \
+  V(SVEFPCompareWithZero)                                        \
+  V(SVEFPComplexAddition)                                        \
+  V(SVEFPComplexMulAdd)                                          \
+  V(SVEFPComplexMulAddIndex)                                     \
+  V(SVEFPFastReduction)                                          \
+  V(SVEFPMulIndex)                                               \
+  V(SVEFPMulAdd)                                                 \
+  V(SVEFPMulAddIndex)                                            \
+  V(SVEFPUnaryOpUnpredicated)                                    \
+  V(SVEIncDecByPredicateCount)                                   \
+  V(SVEIndexGeneration)                                          \
+  V(SVEIntArithmeticUnpredicated)                                \
+  V(SVEIntCompareSignedImm)                                      \
+  V(SVEIntCompareUnsignedImm)                                    \
+  V(SVEIntCompareVectors)                                        \
+  V(SVEIntMulAddPredicated)                                      \
+  V(SVEIntMulAddUnpredicated)                                    \
+  V(SVEIntReduction)                                             \
+  V(SVEIntUnaryArithmeticPredicated)                             \
+  V(SVEMovprfx)                                                  \
+  V(SVEMulIndex)                                                 \
+  V(SVEPermuteVectorExtract)                                     \
+  V(SVEPermuteVectorInterleaving)                                \
+  V(SVEPredicateCount)                                           \
+  V(SVEPredicateLogical)                                         \
+  V(SVEPropagateBreak)                                           \
+  V(SVEStackFrameAdjustment)                                     \
+  V(SVEStackFrameSize)                                           \
+  V(SVEVectorSelect)                                             \
+  V(SVEBitwiseLogical_Predicated)                                \
+  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
+  V(SVEBitwiseShiftByImm_Predicated)                             \
+  V(SVEBitwiseShiftByVector_Predicated)                          \
+  V(SVEBitwiseShiftByWideElements_Predicated)                    \
+  V(SVEBroadcastBitmaskImm)                                      \
+  V(SVEBroadcastFPImm_Unpredicated)                              \
+  V(SVEBroadcastGeneralRegister)                                 \
+  V(SVEBroadcastIndexElement)                                    \
+  V(SVEBroadcastIntImm_Unpredicated)                             \
+  V(SVECompressActiveElements)                                   \
+  V(SVEConditionallyBroadcastElementToVector)                    \
+  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
+  V(SVEConditionallyExtractElementToGeneralRegister)             \
+  V(SVEConditionallyTerminateScalars)                            \
+  V(SVEConstructivePrefix_Unpredicated)                          \
+  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
+  V(SVEContiguousLoad_ScalarPlusImm)                             \
+  V(SVEContiguousLoad_ScalarPlusScalar)                          \
+  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
+  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
+  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
+  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
+  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
+  V(SVEContiguousStore_ScalarPlusImm)                            \
+  V(SVEContiguousStore_ScalarPlusScalar)                         \
+  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
+  V(SVECopyFPImm_Predicated)                                     \
+  V(SVECopyGeneralRegisterToVector_Predicated)                   \
+  V(SVECopyIntImm_Predicated)                                    \
+  V(SVEElementCount)                                             \
+  V(SVEExtractElementToSIMDFPScalarRegister)                     \
+  V(SVEExtractElementToGeneralRegister)                          \
+  V(SVEFPArithmetic_Predicated)                                  \
+  V(SVEFPArithmeticWithImm_Predicated)                           \
+  V(SVEFPConvertPrecision)                                       \
+  V(SVEFPConvertToInt)                                           \
+  V(SVEFPExponentialAccelerator)                                 \
+  V(SVEFPRoundToIntegralValue)                                   \
+  V(SVEFPTrigMulAddCoefficient)                                  \
+  V(SVEFPTrigSelectCoefficient)                                  \
+  V(SVEFPUnaryOp)                                                \
+  V(SVEIncDecRegisterByElementCount)                             \
+  V(SVEIncDecVectorByElementCount)                               \
+  V(SVEInsertSIMDFPScalarRegister)                               \
+  V(SVEInsertGeneralRegister)                                    \
+  V(SVEIntAddSubtractImm_Unpredicated)                           \
+  V(SVEIntAddSubtractVectors_Predicated)                         \
+  V(SVEIntCompareScalarCountAndLimit)                            \
+  V(SVEIntConvertToFP)                                           \
+  V(SVEIntDivideVectors_Predicated)                              \
+  V(SVEIntMinMaxImm_Unpredicated)                                \
+  V(SVEIntMinMaxDifference_Predicated)                           \
+  V(SVEIntMulImm_Unpredicated)                                   \
+  V(SVEIntMulVectors_Predicated)                                 \
+  V(SVELoadAndBroadcastElement)                                  \
+  V(SVELoadAndBroadcastQuadword_ScalarPlusImm)                   \
+  V(SVELoadAndBroadcastQuadword_ScalarPlusScalar)                \
+  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
+  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
+  V(SVELoadPredicateRegister)                                    \
+  V(SVELoadVectorRegister)                                       \
+  V(SVEPartitionBreakCondition)                                  \
+  V(SVEPermutePredicateElements)                                 \
+  V(SVEPredicateFirstActive)                                     \
+  V(SVEPredicateInitialize)                                      \
+  V(SVEPredicateNextActive)                                      \
+  V(SVEPredicateReadFromFFR_Predicated)                          \
+  V(SVEPredicateReadFromFFR_Unpredicated)                        \
+  V(SVEPredicateTest)                                            \
+  V(SVEPredicateZero)                                            \
+  V(SVEPropagateBreakToNextPartition)                            \
+  V(SVEReversePredicateElements)                                 \
+  V(SVEReverseVectorElements)                                    \
+  V(SVEReverseWithinElements)                                    \
+  V(SVESaturatingIncDecRegisterByElementCount)                   \
+  V(SVESaturatingIncDecVectorByElementCount)                     \
+  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
+  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
+  V(SVEStorePredicateRegister)                                   \
+  V(SVEStoreVectorRegister)                                      \
+  V(SVETableLookup)                                              \
+  V(SVEUnpackPredicateElements)                                  \
+  V(SVEUnpackVectorElements)                                     \
+  V(SVEVectorSplice_Destructive)
+
+#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME)                       \
+  void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
+    RecordInstructionFeaturesScope scope(this);                    \
+    scope.Record(CPUFeatures::kSVE);                               \
+    USE(instr);                                                    \
+  }
+VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR)
+#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR
+#undef VIXL_SIMPLE_SVE_VISITOR_LIST
+
 void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
   if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc
index ce1f33fb..c6859bbc 100644
--- a/src/aarch64/decoder-aarch64.cc
+++ b/src/aarch64/decoder-aarch64.cc
@@ -182,22 +182,45 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) {
   case M:                                          \
     bit_extract_fn = &Instruction::ExtractBits<M>; \
     break;
+    INSTANTIATE_TEMPLATE(0x000001e0);
+    INSTANTIATE_TEMPLATE(0x00000400);
     INSTANTIATE_TEMPLATE(0x00000800);
     INSTANTIATE_TEMPLATE(0x00000c00);
+    INSTANTIATE_TEMPLATE(0x00001000);
+    INSTANTIATE_TEMPLATE(0x00001800);
     INSTANTIATE_TEMPLATE(0x00001c00);
     INSTANTIATE_TEMPLATE(0x00004000);
     INSTANTIATE_TEMPLATE(0x00008000);
     INSTANTIATE_TEMPLATE(0x0000f000);
     INSTANTIATE_TEMPLATE(0x0000fc00);
+    INSTANTIATE_TEMPLATE(0x00060010);
+    INSTANTIATE_TEMPLATE(0x00093e00);
+    INSTANTIATE_TEMPLATE(0x000c1000);
+    INSTANTIATE_TEMPLATE(0x00100000);
+    INSTANTIATE_TEMPLATE(0x00101800);
+    INSTANTIATE_TEMPLATE(0x00140000);
+    INSTANTIATE_TEMPLATE(0x00180000);
+    INSTANTIATE_TEMPLATE(0x00181000);
+    INSTANTIATE_TEMPLATE(0x00190000);
+    INSTANTIATE_TEMPLATE(0x00191400);
+    INSTANTIATE_TEMPLATE(0x001c0000);
+    INSTANTIATE_TEMPLATE(0x001c1800);
     INSTANTIATE_TEMPLATE(0x001f0000);
     INSTANTIATE_TEMPLATE(0x0020fc00);
     INSTANTIATE_TEMPLATE(0x0038f000);
     INSTANTIATE_TEMPLATE(0x00400000);
+    INSTANTIATE_TEMPLATE(0x00400010);
     INSTANTIATE_TEMPLATE(0x0040f000);
+    INSTANTIATE_TEMPLATE(0x00500000);
     INSTANTIATE_TEMPLATE(0x00800000);
+    INSTANTIATE_TEMPLATE(0x00800010);
+    INSTANTIATE_TEMPLATE(0x00801800);
+    INSTANTIATE_TEMPLATE(0x009f0000);
     INSTANTIATE_TEMPLATE(0x00c00000);
+    INSTANTIATE_TEMPLATE(0x00c00010);
     INSTANTIATE_TEMPLATE(0x00cf8000);
     INSTANTIATE_TEMPLATE(0x00db0000);
+    INSTANTIATE_TEMPLATE(0x00dc0000);
     INSTANTIATE_TEMPLATE(0x00e00003);
     INSTANTIATE_TEMPLATE(0x00f80400);
     INSTANTIATE_TEMPLATE(0x01e00000);
@@ -233,6 +256,7 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) {
     INSTANTIATE_TEMPLATE(0xc4400000);
     INSTANTIATE_TEMPLATE(0xc4c00000);
     INSTANTIATE_TEMPLATE(0xe0400000);
+    INSTANTIATE_TEMPLATE(0xe120e000);
     INSTANTIATE_TEMPLATE(0xe3c00000);
     INSTANTIATE_TEMPLATE(0xf1200000);
 #undef INSTANTIATE_TEMPLATE
@@ -259,20 +283,44 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) {
     instantiated = true;                                \
   }
   INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00000210, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000);
   INSTANTIATE_TEMPLATE(0x00003000, 0x00000000);
   INSTANTIATE_TEMPLATE(0x00007800, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000);
   INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00030400, 0x00000000);
   INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d);
+  INSTANTIATE_TEMPLATE(0x00060210, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00060810, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400);
+  INSTANTIATE_TEMPLATE(0x00070200, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000);
   INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00180000, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00180000, 0x00100000);
+  INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000);
   INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000);
   INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000);
   INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000);
   INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000);
   INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000);
+  INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000);
   INSTANTIATE_TEMPLATE(0x00780000, 0x00000000);
+  INSTANTIATE_TEMPLATE(0x00870210, 0x00000000);
   INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000);
   INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000);
   INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000);
+  INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000);
+  INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000);
   INSTANTIATE_TEMPLATE(0x01000010, 0x00000000);
   INSTANTIATE_TEMPLATE(0x20000800, 0x00000000);
   INSTANTIATE_TEMPLATE(0x20008000, 0x00000000);
@@ -312,14 +360,16 @@ BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) {
 bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
   // EitherOr optimisation: if there are only one or two patterns in the table,
   // try to optimise the node to exploit that.
-  if ((pattern_table_.size() == 2) && (GetSampledBitsCount() > 1)) {
+  size_t table_size = pattern_table_.size();
+  if ((table_size <= 2) && (GetSampledBitsCount() > 1)) {
     // TODO: support 'x' in this optimisation by dropping the sampled bit
     // positions before making the mask/value.
     if ((strchr(pattern_table_[0].pattern, 'x') == NULL) &&
-        (strcmp(pattern_table_[1].pattern, "otherwise") == 0)) {
+        ((table_size == 1) ||
+         (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) {
       // A pattern table consisting of a fixed pattern with no x's, and an
-      // "otherwise" case. Optimise this into an instruction mask and value
-      // test.
+      // "otherwise" or absent case. Optimise this into an instruction mask and
+      // value test.
       uint32_t single_decode_mask = 0;
       uint32_t single_decode_value = 0;
       std::vector<uint8_t> bits = GetSampledBits();
@@ -332,7 +382,6 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
           single_decode_value |= 1U << bits[i];
         }
       }
-
       BitExtractFn bit_extract_fn =
           GetBitExtractFunction(single_decode_mask, single_decode_value);
 
@@ -342,7 +391,9 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
 
       // Set DecodeNode for when the instruction after masking doesn't match the
       // value.
-      CompileNodeForBits(decoder, pattern_table_[1].handler, 0);
+      const char* doesnt_match_handler =
+          (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler;
+      CompileNodeForBits(decoder, doesnt_match_handler, 0);
 
       // Set DecodeNode for when it does match.
       CompileNodeForBits(decoder, pattern_table_[0].handler, 1);
diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h
index c0f47c36..38540195 100644
--- a/src/aarch64/decoder-aarch64.h
+++ b/src/aarch64/decoder-aarch64.h
@@ -38,99 +38,239 @@
 
 // List macro containing all visitors needed by the decoder class.
 
-#define VISITOR_LIST_THAT_RETURN(V)     \
-  V(AddSubExtended)                     \
-  V(AddSubImmediate)                    \
-  V(AddSubShifted)                      \
-  V(AddSubWithCarry)                    \
-  V(AtomicMemory)                       \
-  V(Bitfield)                           \
-  V(CompareBranch)                      \
-  V(ConditionalBranch)                  \
-  V(ConditionalCompareImmediate)        \
-  V(ConditionalCompareRegister)         \
-  V(ConditionalSelect)                  \
-  V(Crypto2RegSHA)                      \
-  V(Crypto3RegSHA)                      \
-  V(CryptoAES)                          \
-  V(DataProcessing1Source)              \
-  V(DataProcessing2Source)              \
-  V(DataProcessing3Source)              \
-  V(Exception)                          \
-  V(Extract)                            \
-  V(EvaluateIntoFlags)                  \
-  V(FPCompare)                          \
-  V(FPConditionalCompare)               \
-  V(FPConditionalSelect)                \
-  V(FPDataProcessing1Source)            \
-  V(FPDataProcessing2Source)            \
-  V(FPDataProcessing3Source)            \
-  V(FPFixedPointConvert)                \
-  V(FPImmediate)                        \
-  V(FPIntegerConvert)                   \
-  V(LoadLiteral)                        \
-  V(LoadStoreExclusive)                 \
-  V(LoadStorePAC)                       \
-  V(LoadStorePairNonTemporal)           \
-  V(LoadStorePairOffset)                \
-  V(LoadStorePairPostIndex)             \
-  V(LoadStorePairPreIndex)              \
-  V(LoadStorePostIndex)                 \
-  V(LoadStorePreIndex)                  \
-  V(LoadStoreRCpcUnscaledOffset)        \
-  V(LoadStoreRegisterOffset)            \
-  V(LoadStoreUnscaledOffset)            \
-  V(LoadStoreUnsignedOffset)            \
-  V(LogicalImmediate)                   \
-  V(LogicalShifted)                     \
-  V(MoveWideImmediate)                  \
-  V(NEON2RegMisc)                       \
-  V(NEON2RegMiscFP16)                   \
-  V(NEON3Different)                     \
-  V(NEON3Same)                          \
-  V(NEON3SameExtra)                     \
-  V(NEON3SameFP16)                      \
-  V(NEONAcrossLanes)                    \
-  V(NEONByIndexedElement)               \
-  V(NEONCopy)                           \
-  V(NEONExtract)                        \
-  V(NEONLoadStoreMultiStruct)           \
-  V(NEONLoadStoreMultiStructPostIndex)  \
-  V(NEONLoadStoreSingleStruct)          \
-  V(NEONLoadStoreSingleStructPostIndex) \
-  V(NEONModifiedImmediate)              \
-  V(NEONPerm)                           \
-  V(NEONScalar2RegMisc)                 \
-  V(NEONScalar2RegMiscFP16)             \
-  V(NEONScalar3Diff)                    \
-  V(NEONScalar3Same)                    \
-  V(NEONScalar3SameExtra)               \
-  V(NEONScalar3SameFP16)                \
-  V(NEONScalarByIndexedElement)         \
-  V(NEONScalarCopy)                     \
-  V(NEONScalarPairwise)                 \
-  V(NEONScalarShiftImmediate)           \
-  V(NEONShiftImmediate)                 \
-  V(NEONTable)                          \
-  V(PCRelAddressing)                    \
-  V(RotateRightIntoFlags)               \
-  V(System)                             \
-  V(TestBranch)                         \
-  V(UnconditionalBranch)                \
-  V(UnconditionalBranchToRegister)
-
-// TODO: We shouldn't expose debug-only behaviour like this. Instead, we should
-// use release-mode aborts where appropriate, and merge thse into a single
-// no-return list.
-#define VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \
-  V(Unallocated)                                       \
+#define VISITOR_LIST_THAT_RETURN(V)                              \
+  V(AddSubExtended)                                              \
+  V(AddSubImmediate)                                             \
+  V(AddSubShifted)                                               \
+  V(AddSubWithCarry)                                             \
+  V(AtomicMemory)                                                \
+  V(Bitfield)                                                    \
+  V(CompareBranch)                                               \
+  V(ConditionalBranch)                                           \
+  V(ConditionalCompareImmediate)                                 \
+  V(ConditionalCompareRegister)                                  \
+  V(ConditionalSelect)                                           \
+  V(Crypto2RegSHA)                                               \
+  V(Crypto3RegSHA)                                               \
+  V(CryptoAES)                                                   \
+  V(DataProcessing1Source)                                       \
+  V(DataProcessing2Source)                                       \
+  V(DataProcessing3Source)                                       \
+  V(EvaluateIntoFlags)                                           \
+  V(Exception)                                                   \
+  V(Extract)                                                     \
+  V(FPCompare)                                                   \
+  V(FPConditionalCompare)                                        \
+  V(FPConditionalSelect)                                         \
+  V(FPDataProcessing1Source)                                     \
+  V(FPDataProcessing2Source)                                     \
+  V(FPDataProcessing3Source)                                     \
+  V(FPFixedPointConvert)                                         \
+  V(FPImmediate)                                                 \
+  V(FPIntegerConvert)                                            \
+  V(LoadLiteral)                                                 \
+  V(LoadStoreExclusive)                                          \
+  V(LoadStorePAC)                                                \
+  V(LoadStorePairNonTemporal)                                    \
+  V(LoadStorePairOffset)                                         \
+  V(LoadStorePairPostIndex)                                      \
+  V(LoadStorePairPreIndex)                                       \
+  V(LoadStorePostIndex)                                          \
+  V(LoadStorePreIndex)                                           \
+  V(LoadStoreRCpcUnscaledOffset)                                 \
+  V(LoadStoreRegisterOffset)                                     \
+  V(LoadStoreUnscaledOffset)                                     \
+  V(LoadStoreUnsignedOffset)                                     \
+  V(LogicalImmediate)                                            \
+  V(LogicalShifted)                                              \
+  V(MoveWideImmediate)                                           \
+  V(NEON2RegMisc)                                                \
+  V(NEON2RegMiscFP16)                                            \
+  V(NEON3Different)                                              \
+  V(NEON3Same)                                                   \
+  V(NEON3SameExtra)                                              \
+  V(NEON3SameFP16)                                               \
+  V(NEONAcrossLanes)                                             \
+  V(NEONByIndexedElement)                                        \
+  V(NEONCopy)                                                    \
+  V(NEONExtract)                                                 \
+  V(NEONLoadStoreMultiStruct)                                    \
+  V(NEONLoadStoreMultiStructPostIndex)                           \
+  V(NEONLoadStoreSingleStruct)                                   \
+  V(NEONLoadStoreSingleStructPostIndex)                          \
+  V(NEONModifiedImmediate)                                       \
+  V(NEONPerm)                                                    \
+  V(NEONScalar2RegMisc)                                          \
+  V(NEONScalar2RegMiscFP16)                                      \
+  V(NEONScalar3Diff)                                             \
+  V(NEONScalar3Same)                                             \
+  V(NEONScalar3SameExtra)                                        \
+  V(NEONScalar3SameFP16)                                         \
+  V(NEONScalarByIndexedElement)                                  \
+  V(NEONScalarCopy)                                              \
+  V(NEONScalarPairwise)                                          \
+  V(NEONScalarShiftImmediate)                                    \
+  V(NEONShiftImmediate)                                          \
+  V(NEONTable)                                                   \
+  V(PCRelAddressing)                                             \
+  V(RotateRightIntoFlags)                                        \
+  V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
+  V(SVE32BitGatherLoad_VectorPlusImm)                            \
+  V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
+  V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
+  V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
+  V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
+  V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
+  V(SVE32BitScatterStore_VectorPlusImm)                          \
+  V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
+  V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
+  V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
+  V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
+  V(SVE64BitGatherLoad_VectorPlusImm)                            \
+  V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
+  V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
+  V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
+  V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
+  V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
+  V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
+  V(SVE64BitScatterStore_VectorPlusImm)                          \
+  V(SVEAddressGeneration)                                        \
+  V(SVEBitwiseLogicalUnpredicated)                               \
+  V(SVEBitwiseShiftUnpredicated)                                 \
+  V(SVEFFRInitialise)                                            \
+  V(SVEFFRWriteFromPredicate)                                    \
+  V(SVEFPAccumulatingReduction)                                  \
+  V(SVEFPArithmeticUnpredicated)                                 \
+  V(SVEFPCompareVectors)                                         \
+  V(SVEFPCompareWithZero)                                        \
+  V(SVEFPComplexAddition)                                        \
+  V(SVEFPComplexMulAdd)                                          \
+  V(SVEFPComplexMulAddIndex)                                     \
+  V(SVEFPFastReduction)                                          \
+  V(SVEFPMulIndex)                                               \
+  V(SVEFPMulAdd)                                                 \
+  V(SVEFPMulAddIndex)                                            \
+  V(SVEFPUnaryOpUnpredicated)                                    \
+  V(SVEIncDecByPredicateCount)                                   \
+  V(SVEIndexGeneration)                                          \
+  V(SVEIntArithmeticUnpredicated)                                \
+  V(SVEIntCompareSignedImm)                                      \
+  V(SVEIntCompareUnsignedImm)                                    \
+  V(SVEIntCompareVectors)                                        \
+  V(SVEIntMulAddPredicated)                                      \
+  V(SVEIntMulAddUnpredicated)                                    \
+  V(SVEIntReduction)                                             \
+  V(SVEIntUnaryArithmeticPredicated)                             \
+  V(SVEMovprfx)                                                  \
+  V(SVEMulIndex)                                                 \
+  V(SVEPermuteVectorExtract)                                     \
+  V(SVEPermuteVectorInterleaving)                                \
+  V(SVEPredicateCount)                                           \
+  V(SVEPredicateLogical)                                         \
+  V(SVEPropagateBreak)                                           \
+  V(SVEStackFrameAdjustment)                                     \
+  V(SVEStackFrameSize)                                           \
+  V(SVEVectorSelect)                                             \
+  V(SVEBitwiseLogical_Predicated)                                \
+  V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
+  V(SVEBitwiseShiftByImm_Predicated)                             \
+  V(SVEBitwiseShiftByVector_Predicated)                          \
+  V(SVEBitwiseShiftByWideElements_Predicated)                    \
+  V(SVEBroadcastBitmaskImm)                                      \
+  V(SVEBroadcastFPImm_Unpredicated)                              \
+  V(SVEBroadcastGeneralRegister)                                 \
+  V(SVEBroadcastIndexElement)                                    \
+  V(SVEBroadcastIntImm_Unpredicated)                             \
+  V(SVECompressActiveElements)                                   \
+  V(SVEConditionallyBroadcastElementToVector)                    \
+  V(SVEConditionallyExtractElementToSIMDFPScalar)                \
+  V(SVEConditionallyExtractElementToGeneralRegister)             \
+  V(SVEConditionallyTerminateScalars)                            \
+  V(SVEConstructivePrefix_Unpredicated)                          \
+  V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
+  V(SVEContiguousLoad_ScalarPlusImm)                             \
+  V(SVEContiguousLoad_ScalarPlusScalar)                          \
+  V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
+  V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
+  V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
+  V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
+  V(SVEContiguousPrefetch_ScalarPlusImm)                         \
+  V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
+  V(SVEContiguousStore_ScalarPlusImm)                            \
+  V(SVEContiguousStore_ScalarPlusScalar)                         \
+  V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
+  V(SVECopyFPImm_Predicated)                                     \
+  V(SVECopyGeneralRegisterToVector_Predicated)                   \
+  V(SVECopyIntImm_Predicated)                                    \
+  V(SVEElementCount)                                             \
+  V(SVEExtractElementToSIMDFPScalarRegister)                     \
+  V(SVEExtractElementToGeneralRegister)                          \
+  V(SVEFPArithmetic_Predicated)                                  \
+  V(SVEFPArithmeticWithImm_Predicated)                           \
+  V(SVEFPConvertPrecision)                                       \
+  V(SVEFPConvertToInt)                                           \
+  V(SVEFPExponentialAccelerator)                                 \
+  V(SVEFPRoundToIntegralValue)                                   \
+  V(SVEFPTrigMulAddCoefficient)                                  \
+  V(SVEFPTrigSelectCoefficient)                                  \
+  V(SVEFPUnaryOp)                                                \
+  V(SVEIncDecRegisterByElementCount)                             \
+  V(SVEIncDecVectorByElementCount)                               \
+  V(SVEInsertSIMDFPScalarRegister)                               \
+  V(SVEInsertGeneralRegister)                                    \
+  V(SVEIntAddSubtractImm_Unpredicated)                           \
+  V(SVEIntAddSubtractVectors_Predicated)                         \
+  V(SVEIntCompareScalarCountAndLimit)                            \
+  V(SVEIntConvertToFP)                                           \
+  V(SVEIntDivideVectors_Predicated)                              \
+  V(SVEIntMinMaxImm_Unpredicated)                                \
+  V(SVEIntMinMaxDifference_Predicated)                           \
+  V(SVEIntMulImm_Unpredicated)                                   \
+  V(SVEIntMulVectors_Predicated)                                 \
+  V(SVELoadAndBroadcastElement)                                  \
+  V(SVELoadAndBroadcastQuadword_ScalarPlusImm)                   \
+  V(SVELoadAndBroadcastQuadword_ScalarPlusScalar)                \
+  V(SVELoadMultipleStructures_ScalarPlusImm)                     \
+  V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
+  V(SVELoadPredicateRegister)                                    \
+  V(SVELoadVectorRegister)                                       \
+  V(SVEPartitionBreakCondition)                                  \
+  V(SVEPermutePredicateElements)                                 \
+  V(SVEPredicateFirstActive)                                     \
+  V(SVEPredicateInitialize)                                      \
+  V(SVEPredicateNextActive)                                      \
+  V(SVEPredicateReadFromFFR_Predicated)                          \
+  V(SVEPredicateReadFromFFR_Unpredicated)                        \
+  V(SVEPredicateTest)                                            \
+  V(SVEPredicateZero)                                            \
+  V(SVEPropagateBreakToNextPartition)                            \
+  V(SVEReversePredicateElements)                                 \
+  V(SVEReverseVectorElements)                                    \
+  V(SVEReverseWithinElements)                                    \
+  V(SVESaturatingIncDecRegisterByElementCount)                   \
+  V(SVESaturatingIncDecVectorByElementCount)                     \
+  V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
+  V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
+  V(SVEStorePredicateRegister)                                   \
+  V(SVEStoreVectorRegister)                                      \
+  V(SVETableLookup)                                              \
+  V(SVEUnpackPredicateElements)                                  \
+  V(SVEUnpackVectorElements)                                     \
+  V(SVEVectorSplice_Destructive)                                 \
+  V(System)                                                      \
+  V(TestBranch)                                                  \
+  V(Unallocated)                                                 \
+  V(UnconditionalBranch)                                         \
+  V(UnconditionalBranchToRegister)                               \
   V(Unimplemented)
 
 #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
 
-#define VISITOR_LIST(V)                          \
-  VISITOR_LIST_THAT_RETURN(V)                    \
-  VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(V) \
+#define VISITOR_LIST(V)       \
+  VISITOR_LIST_THAT_RETURN(V) \
   VISITOR_LIST_THAT_DONT_RETURN(V)
 
 namespace vixl {
@@ -138,6 +278,12 @@ namespace aarch64 {
 
 // The Visitor interface. Disassembler and simulator (and other tools)
 // must provide implementations for all of these functions.
+//
+// Note that this class must change in breaking ways with even minor additions
+// to VIXL, and so its API should be considered unstable. User classes that
+// inherit from this one should be expected to break even on minor version
+// updates. If this is a problem, consider using DecoderVisitorWithDefaults
+// instead.
 class DecoderVisitor {
  public:
   enum VisitorConstness { kConstVisitor, kNonConstVisitor };
@@ -160,6 +306,25 @@ class DecoderVisitor {
   const VisitorConstness constness_;
 };
 
+// As above, but a default (no-op) implementation for each visitor is provided.
+// This is useful for derived class that only care about specific visitors.
+//
+// A minor version update may add a visitor, but will never remove one, so it is
+// safe (and recommended) to use `override` in derived classes.
+class DecoderVisitorWithDefaults : public DecoderVisitor {
+ public:
+  explicit DecoderVisitorWithDefaults(
+      VisitorConstness constness = kConstVisitor)
+      : DecoderVisitor(constness) {}
+
+  virtual ~DecoderVisitorWithDefaults() {}
+
+#define DECLARE(A) \
+  virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); }
+  VISITOR_LIST(DECLARE)
+#undef DECLARE
+};
+
 class DecodeNode;
 class CompiledDecodeNode;
 
@@ -257,7 +422,7 @@ class Decoder {
 };
 
 const int kMaxDecodeSampledBits = 16;
-const int kMaxDecodeMappings = 22;
+const int kMaxDecodeMappings = 100;
 typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
 typedef uint32_t (Instruction::*BitExtractFn)(void) const;
 
diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h
index def27fa1..53f283bb 100644
--- a/src/aarch64/decoder-constants-aarch64.h
+++ b/src/aarch64/decoder-constants-aarch64.h
@@ -39,6 +39,7 @@ static const DecodeMapping kDecodeMapping[] = {
   { "Root",
     {28, 27, 26, 25},
     { {"0000", "DecodeReserved"},
+      {"0010", "DecodeSVE"},
       {"100x", "DecodeDataProcessingImmediate"},
       {"101x", "DecodeBranchesExceptionAndSystem"},
       {"x1x0", "DecodeLoadsAndStores"},
@@ -124,6 +125,720 @@ static const DecodeMapping kDecodeMapping[] = {
     },
   },
 
+  { "DecodeSVE",
+    {31, 30, 29, 24, 21, 15, 14, 13},
+    { {"00000x1x", "VisitSVEIntMulAddPredicated"},
+      {"00000000", "DecodeSVE00000000"},
+      {"00000001", "DecodeSVE00000001"},
+      {"00000100", "DecodeSVE00000100"},
+      {"00000101", "VisitSVEIntUnaryArithmeticPredicated"},
+      {"00001000", "VisitSVEIntArithmeticUnpredicated"},
+      {"00001001", "VisitSVEBitwiseLogicalUnpredicated"},
+      {"00001010", "DecodeSVE00001010"},
+      {"00001100", "VisitSVEBitwiseShiftUnpredicated"},
+      {"00001101", "DecodeSVE00001101"},
+      {"00001110", "DecodeSVE00001110"},
+      {"00001111", "DecodeSVE00001111"},
+      {"000100xx", "DecodeSVE000100xx"},
+      {"0001010x", "DecodeSVE0001010x"},
+      {"00010110", "DecodeSVE00010110"},
+      {"00010111", "DecodeSVE00010111"},
+      {"00011000", "VisitSVEPermuteVectorExtract"},
+      {"00011001", "DecodeSVE00011001"},
+      {"00011010", "DecodeSVE00011010"},
+      {"00011011", "VisitSVEPermuteVectorInterleaving"},
+      {"00011100", "DecodeSVE00011100"},
+      {"00011101", "DecodeSVE00011101"},
+      {"0001111x", "VisitSVEVectorSelect"},
+      {"00100xxx", "VisitSVEIntCompareVectors"},
+      {"00101xxx", "VisitSVEIntCompareUnsignedImm"},
+      {"00110x0x", "VisitSVEIntCompareSignedImm"},
+      {"0011001x", "DecodeSVE0011001x"},
+      {"00110110", "DecodeSVE00110110"},
+      {"00110111", "DecodeSVE00110111"},
+      {"00111000", "VisitSVEIntCompareScalarCountAndLimit"},
+      {"00111001", "UnallocSVEConditionallyTerminateScalars"},
+      {"00111100", "DecodeSVE00111100"},
+      {"00111101", "UnallocSVEPredicateCount"},
+      {"0011111x", "DecodeSVE0011111x"},
+      {"010000xx", "VisitSVEIntMulAddUnpredicated"},
+      {"01001xxx", "VisitSVEMulIndex"},
+      {"011000xx", "VisitSVEFPComplexMulAdd"},
+      {"01100100", "UnallocSVEFPComplexAddition"},
+      {"01101000", "DecodeSVE01101000"},
+      {"01101001", "UnallocSVEFPMulIndex"},
+      {"01110x1x", "VisitSVEFPCompareVectors"},
+      {"01110000", "VisitSVEFPArithmeticUnpredicated"},
+      {"01110001", "DecodeSVE01110001"},
+      {"01110100", "DecodeSVE01110100"},
+      {"01110101", "DecodeSVE01110101"},
+      {"01111xxx", "VisitSVEFPMulAdd"},
+      {"100x010x", "UnallocSVELoadAndBroadcastElement"},
+      {"100x0110", "DecodeSVE100x0110"},
+      {"100x0111", "DecodeSVE100x0111"},
+      {"100x11xx", "DecodeSVE100x11xx"},
+      {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+      {"100010xx", "DecodeSVE100010xx"},
+      {"100100x1", "DecodeSVE100100x1"},
+      {"10010000", "DecodeSVE10010000"},
+      {"10010010", "DecodeSVE10010010"},
+      {"100110x1", "DecodeSVE100110x1"},
+      {"10011000", "DecodeSVE10011000"},
+      {"10011010", "DecodeSVE10011010"},
+      {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"},
+      {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"},
+      {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
+      {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"},
+      {"101xx101", "DecodeSVE101xx101"},
+      {"101x0110", "DecodeSVE101x0110"},
+      {"101x0111", "DecodeSVE101x0111"},
+      {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
+      {"101x1111", "DecodeSVE101x1111"},
+      {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"},
+      {"110x0111", "DecodeSVE110x0111"},
+      {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+      {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+      {"110010xx", "DecodeSVE110010xx"},
+      {"110011xx", "DecodeSVE110011xx"},
+      {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+      {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+      {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
+      {"110111xx", "DecodeSVE110111xx"},
+      {"111x0011", "DecodeSVE111x0011"},
+      {"111x01x0", "DecodeSVE111x01x0"},
+      {"111x0101", "DecodeSVE111x0101"},
+      {"111x0111", "DecodeSVE111x0111"},
+      {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
+      {"111x11x0", "DecodeSVE111x11x0"},
+      {"111x1101", "DecodeSVE111x1101"},
+      {"111x1111", "DecodeSVE111x1111"},
+      {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"},
+      {"1111x000", "UnallocSVEStorePredicateRegister"},
+      {"1111x010", "DecodeSVE1111x010"},
+    },
+  },
+
+  { "DecodeSVE00000000",
+    {20, 19, 18},
+    { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"},
+      {"01x", "VisitSVEIntMinMaxDifference_Predicated"},
+      {"100", "VisitSVEIntMulVectors_Predicated"},
+      {"101", "VisitSVEIntDivideVectors_Predicated"},
+      {"11x", "VisitSVEBitwiseLogical_Predicated"},
+    },
+  },
+
+  { "DecodeSVE00000100",
+    {20, 19},
+    { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"},
+      {"10", "VisitSVEBitwiseShiftByVector_Predicated"},
+      {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"},
+    },
+  },
+
+  { "DecodeSVE00001010",
+    {23, 12, 11},
+    { {"x0x", "VisitSVEIndexGeneration"},
+      {"010", "VisitSVEStackFrameAdjustment"},
+      {"110", "UnallocSVEStackFrameSize"},
+    },
+  },
+
+  { "UnallocSVEStackFrameSize",
+    {22, 20, 19, 18, 17, 16},
+    { {"011111", "VisitSVEStackFrameSize"},
+    },
+  },
+
+  { "DecodeSVE00001101",
+    {12, 11, 10},
+    { {"0xx", "VisitSVEAddressGeneration"},
+      {"10x", "VisitSVEFPTrigSelectCoefficient"},
+      {"110", "VisitSVEFPExponentialAccelerator"},
+      {"111", "VisitSVEConstructivePrefix_Unpredicated"},
+    },
+  },
+
+  { "DecodeSVE00001110",
+    {20, 12, 11},
+    { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"},
+      {"100", "VisitSVEIncDecVectorByElementCount"},
+    },
+  },
+
+  { "DecodeSVE00001111",
+    {20, 12, 11},
+    { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"},
+      {"000", "VisitSVEElementCount"},
+      {"100", "VisitSVEIncDecRegisterByElementCount"},
+    },
+  },
+
+  { "DecodeSVE000100xx",
+    {23, 22, 20, 19, 18},
+    { {"xx1xx", "VisitSVECopyIntImm_Predicated"},
+      {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"11000", "VisitSVEBroadcastBitmaskImm"},
+    },
+  },
+
+  { "DecodeSVE0001010x",
+    {23, 22, 20, 19, 18},
+    { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"11000", "VisitSVEBroadcastBitmaskImm"},
+    },
+  },
+
+  { "DecodeSVE00010110",
+    {23, 22, 20, 19, 18},
+    { {"xx1xx", "VisitSVECopyFPImm_Predicated"},
+      {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"11000", "VisitSVEBroadcastBitmaskImm"},
+    },
+  },
+
+  { "DecodeSVE00010111",
+    {23, 22, 20, 19, 18},
+    { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
+      {"11000", "VisitSVEBroadcastBitmaskImm"},
+    },
+  },
+
+  { "UnallocSVEBroadcastIndexElement",
+    {10},
+    { {"0", "VisitSVEBroadcastIndexElement"},
+    },
+  },
+
+  { "UnallocSVETableLookup",
+    {10},
+    { {"0", "VisitSVETableLookup"},
+    },
+  },
+
+  { "UnallocSVEBroadcastGeneralRegister",
+    {17, 16, 10},
+    { {"000", "VisitSVEBroadcastGeneralRegister"},
+    },
+  },
+
+  { "UnallocSVEInsertGeneralRegister",
+    {17, 16, 10},
+    { {"000", "VisitSVEInsertGeneralRegister"},
+    },
+  },
+
+  { "UnallocSVEUnpackVectorElements",
+    {10},
+    { {"0", "VisitSVEUnpackVectorElements"},
+    },
+  },
+
+  { "UnallocSVEInsertSIMDFPScalarRegister",
+    {17, 16, 10},
+    { {"000", "VisitSVEInsertSIMDFPScalarRegister"},
+    },
+  },
+
+  { "UnallocSVEReverseVectorElements",
+    {17, 16, 10},
+    { {"000", "VisitSVEReverseVectorElements"},
+    },
+  },
+
+  { "DecodeSVE00011001",
+    {20, 19, 18, 12, 11},
+    { {"xxx00", "UnallocSVEBroadcastIndexElement"},
+      {"xxx10", "UnallocSVETableLookup"},
+      {"00011", "UnallocSVEBroadcastGeneralRegister"},
+      {"00111", "UnallocSVEInsertGeneralRegister"},
+      {"10011", "UnallocSVEUnpackVectorElements"},
+      {"10111", "UnallocSVEInsertSIMDFPScalarRegister"},
+      {"11011", "UnallocSVEReverseVectorElements"},
+    },
+  },
+
+  { "UnallocSVEPermutePredicateElements",
+    {9, 4},
+    { {"00", "VisitSVEPermutePredicateElements"},
+    },
+  },
+
+  { "UnallocSVEUnpackPredicateElements",
+    {23, 22, 19, 17, 12, 11, 10, 9, 4},
+    { {"000000000", "VisitSVEUnpackPredicateElements"},
+    },
+  },
+
+  { "UnallocSVEReversePredicateElements",
+    {19, 17, 16, 12, 11, 10, 9, 4},
+    { {"00000000", "VisitSVEReversePredicateElements"},
+    },
+  },
+
+  { "DecodeSVE00011010",
+    {20, 18},
+    { {"0x", "UnallocSVEPermutePredicateElements"},
+      {"10", "UnallocSVEUnpackPredicateElements"},
+      {"11", "UnallocSVEReversePredicateElements"},
+    },
+  },
+
+  { "DecodeSVE00011100",
+    {23, 20, 19, 18, 17, 16},
+    { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"},
+      {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"},
+      {"x001xx", "VisitSVEReverseWithinElements"},
+      {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"},
+      {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"},
+      {"x01100", "VisitSVEVectorSplice_Destructive"},
+      {"100001", "VisitSVECompressActiveElements"},
+    },
+  },
+
+  { "DecodeSVE00011101",
+    {20, 19, 18, 17, 16},
+    { {"0000x", "VisitSVEExtractElementToGeneralRegister"},
+      {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"},
+      {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"},
+    },
+  },
+
+  { "UnallocSVEPartitionBreakCondition",
+    {18, 17, 16, 9},
+    { {"0000", "VisitSVEPartitionBreakCondition"},
+    },
+  },
+
+  { "UnallocSVEPropagateBreakToNextPartition",
+    {23, 18, 17, 16, 9, 4},
+    { {"000000", "VisitSVEPropagateBreakToNextPartition"},
+    },
+  },
+
+  { "DecodeSVE0011001x",
+    {20, 19},
+    { {"0x", "VisitSVEPredicateLogical"},
+      {"10", "UnallocSVEPartitionBreakCondition"},
+      {"11", "UnallocSVEPropagateBreakToNextPartition"},
+    },
+  },
+
+  { "UnallocSVEPredicateTest",
+    {18, 17, 9, 4},
+    { {"0000", "VisitSVEPredicateTest"},
+    },
+  },
+
+  { "UnallocSVEPredicateFirstActive",
+    {18, 17, 12, 11, 10, 9, 4},
+    { {"0000000", "VisitSVEPredicateFirstActive"},
+    },
+  },
+
+  { "UnallocSVEPredicateNextActive",
+    {18, 17, 12, 11, 10, 9, 4},
+    { {"0000100", "VisitSVEPredicateNextActive"},
+    },
+  },
+
+  { "DecodeSVE00110110",
+    {20, 19, 16},
+    { {"0xx", "VisitSVEPropagateBreak"},
+      {"100", "UnallocSVEPredicateTest"},
+      {"110", "UnallocSVEPredicateFirstActive"},
+      {"111", "UnallocSVEPredicateNextActive"},
+    },
+  },
+
+  { "UnallocSVEPredicateTest",
+    {18, 17, 9, 4},
+    { {"0000", "VisitSVEPredicateTest"},
+    },
+  },
+
+  { "UnallocSVEPredicateInitialize",
+    {18, 17, 11, 4},
+    { {"0000", "VisitSVEPredicateInitialize"},
+    },
+  },
+
+  { "UnallocSVEPredicateZero",
+    {18, 17, 11, 9, 8, 7, 6, 5, 4},
+    { {"000000000", "VisitSVEPredicateZero"},
+    },
+  },
+
+  { "UnallocSVEPredicateReadFromFFR_Predicated",
+    {18, 17, 11, 9, 4},
+    { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"},
+    },
+  },
+
+  { "UnallocSVEPredicateReadFromFFR_Unpredicated",
+    {18, 17, 11, 9, 8, 7, 6, 5, 4},
+    { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"},
+    },
+  },
+
+  { "DecodeSVE00110111",
+    {20, 19, 16, 12, 10},
+    { {"0xxxx", "VisitSVEPropagateBreak"},
+      {"100xx", "UnallocSVEPredicateTest"},
+      {"11x00", "UnallocSVEPredicateInitialize"},
+      {"11001", "UnallocSVEPredicateZero"},
+      {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"},
+      {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"},
+    },
+  },
+
+  { "UnallocSVEConditionallyTerminateScalars",
+    {12, 11, 10, 3, 2, 1, 0},
+    { {"0000000", "VisitSVEConditionallyTerminateScalars"},
+    },
+  },
+
+  { "UnallocSVEPredicateCount_2",
+    {20},
+    { {"0", "VisitSVEPredicateCount"},
+    },
+  },
+
+  { "UnallocSVEIncDecByPredicateCount",
+    {20},
+    { {"0", "VisitSVEIncDecByPredicateCount"},
+    },
+  },
+
+  { "UnallocSVEFFRWriteFromPredicate",
+    {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0},
+    { {"00000000000", "VisitSVEFFRWriteFromPredicate"},
+    },
+  },
+
+  { "UnallocSVEFFRInitialise",
+    {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+    { {"000000000000000", "VisitSVEFFRInitialise"},
+    },
+  },
+
+  { "DecodeSVE00111100",
+    {19, 18, 12},
+    { {"0xx", "UnallocSVEPredicateCount_2"},
+      {"1x0", "UnallocSVEIncDecByPredicateCount"},
+      {"101", "UnallocSVEFFRWriteFromPredicate"},
+      {"111", "UnallocSVEFFRInitialise"},
+    },
+  },
+
+  { "UnallocSVEPredicateCount",
+    {20, 19},
+    { {"00", "VisitSVEPredicateCount"},
+    },
+  },
+
+  { "DecodeSVE0011111x",
+    {20, 19, 16},
+    { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"},
+      {"01x", "VisitSVEIntMinMaxImm_Unpredicated"},
+      {"10x", "VisitSVEIntMulImm_Unpredicated"},
+      {"110", "VisitSVEBroadcastIntImm_Unpredicated"},
+      {"111", "VisitSVEBroadcastFPImm_Unpredicated"},
+    },
+  },
+
+  { "UnallocSVEFPComplexAddition",
+    {20, 19, 18, 17},
+    { {"0000", "VisitSVEFPComplexAddition"},
+    },
+  },
+
+  { "DecodeSVE01101000",
+    {12, 11},
+    { {"00", "VisitSVEFPMulAddIndex"},
+      {"1x", "VisitSVEFPComplexMulAddIndex"},
+    },
+  },
+
+  { "UnallocSVEFPMulIndex",
+    {12, 11, 10},
+    { {"000", "VisitSVEFPMulIndex"},
+    },
+  },
+
+  { "DecodeSVE01110001",
+    {20, 19, 12},
+    { {"00x", "VisitSVEFPFastReduction"},
+      {"011", "VisitSVEFPUnaryOpUnpredicated"},
+      {"10x", "VisitSVEFPCompareWithZero"},
+      {"11x", "VisitSVEFPAccumulatingReduction"},
+    },
+  },
+
+  { "UnallocSVEFPTrigMulAddCoefficient",
+    {12, 11, 10},
+    { {"000", "VisitSVEFPTrigMulAddCoefficient"},
+    },
+  },
+
+  { "UnallocSVEFPArithmeticWithImm_Predicated",
+    {9, 8, 7, 6},
+    { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"},
+    },
+  },
+
+  { "DecodeSVE01110100",
+    {20, 19},
+    { {"0x", "VisitSVEFPArithmetic_Predicated"},
+      {"10", "UnallocSVEFPTrigMulAddCoefficient"},
+      {"11", "UnallocSVEFPArithmeticWithImm_Predicated"},
+    },
+  },
+
+  { "DecodeSVE01110101",
+    {20, 19, 18},
+    { {"00x", "VisitSVEFPRoundToIntegralValue"},
+      {"010", "VisitSVEFPConvertPrecision"},
+      {"011", "VisitSVEFPUnaryOp"},
+      {"10x", "VisitSVEIntConvertToFP"},
+      {"11x", "VisitSVEFPConvertToInt"},
+    },
+  },
+
+  { "UnallocSVELoadAndBroadcastElement",
+    {22},
+    { {"1", "VisitSVELoadAndBroadcastElement"},
+    },
+  },
+
+  { "DecodeSVE100x0110",
+    {22, 4},
+    { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"},
+      {"1x", "VisitSVELoadAndBroadcastElement"},
+    },
+  },
+
+  { "DecodeSVE100x0111",
+    {22, 4},
+    { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"},
+      {"1x", "VisitSVELoadAndBroadcastElement"},
+    },
+  },
+
+  { "DecodeSVE100x11xx",
+    {22},
+    { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"},
+      {"1", "VisitSVELoadAndBroadcastElement"},
+    },
+  },
+
+  { "DecodeSVE100010xx",
+    {23, 4},
+    { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"},
+      {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE100100x1",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE10010000",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+      {"100", "VisitSVELoadPredicateRegister"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE10010010",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
+      {"10x", "VisitSVELoadVectorRegister"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE100110x1",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE10011000",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+      {"100", "VisitSVELoadPredicateRegister"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE10011010",
+    {23, 22, 4},
+    { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
+      {"10x", "VisitSVELoadVectorRegister"},
+      {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+    },
+  },
+
+  { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm",
+    {20},
+    { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE101xx101",
+    {20},
+    { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
+      {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE101x0110",
+    {22},
+    { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"},
+      {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
+    },
+  },
+
+  { "DecodeSVE101x0111",
+    {22, 20},
+    { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"},
+      {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE101x1111",
+    {22, 20},
+    { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE110x0111",
+    {22, 4},
+    { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"},
+      {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+    },
+  },
+
+  { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+    {22},
+    { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+    },
+  },
+
+  { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+    {22},
+    { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE110010xx",
+    {23, 4},
+    { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"},
+      {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE110011xx",
+    {23, 22, 4},
+    { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"},
+      {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"},
+      {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+    },
+  },
+
+  { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+    {22},
+    { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+    },
+  },
+
+  { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+    {22},
+    { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE110111xx",
+    {22},
+    { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"},
+      {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE111x0011",
+    {22},
+    { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"},
+      {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
+    },
+  },
+
+  { "DecodeSVE111x01x0",
+    {22},
+    { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"},
+      {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE111x0101",
+    {22},
+    { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"},
+      {"1", "VisitSVE64BitScatterStore_VectorPlusImm"},
+    },
+  },
+
+  { "DecodeSVE111x0111",
+    {22, 20},
+    { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
+      {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"},
+      {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+    },
+  },
+
+  { "DecodeSVE111x11x0",
+    {22},
+    { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"},
+      {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"},
+    },
+  },
+
+  { "DecodeSVE111x1101",
+    {22},
+    { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"},
+      {"1", "VisitSVE32BitScatterStore_VectorPlusImm"},
+    },
+  },
+
+  { "DecodeSVE111x1111",
+    {22, 20},
+    { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
+      {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+    },
+  },
+
+  { "UnallocSVEStorePredicateRegister",
+    {23, 22, 4},
+    { {"100", "VisitSVEStorePredicateRegister"},
+    },
+  },
+
+  { "DecodeSVE1111x010",
+    {23, 22},
+    { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"},
+      {"10", "VisitSVEStoreVectorRegister"},
+      {"11", "VisitSVEContiguousStore_ScalarPlusScalar"},
+    },
+  },
+
   { "DecodeNEONScalarAnd3SHA",
     {29, 23, 22, 15, 14, 11, 10},
     { {"0xx0x00", "VisitCrypto3RegSHA"},
@@ -1388,6 +2103,28 @@ static const DecodeMapping kDecodeMapping[] = {
       {"otherwise", "VisitUnconditionalBranchToRegister"},
     },
   },
+
+  { "DecodeSVE101xxxxx",
+    {15, 14, 13},
+    { {"101", "DecodeSVE101xx101"},
+      {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
+      {"otherwise", "VisitSVEMemContiguousLoad"},
+    },
+  },
+
+  { "DecodeSVE101xx101",
+    {20},
+    { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
+      {"1", "VisitSVEMemContiguousLoad"},
+    },
+  },
+
+  { "DecodeSVE00000001",
+    {20, 19},
+    { {"10", "VisitSVEMovprfx"},
+      {"otherwise", "VisitSVEIntReduction"},
+    },
+  },
 };
 // clang-format on
 
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 7d6fa148..d8ac2d24 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -24,6 +24,7 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <bitset>
 #include <cstdlib>
 #include <sstream>
 
@@ -956,7 +957,7 @@ void Disassembler::VisitTestBranch(const Instruction *instr) {
   // disassembled as Wt, otherwise Xt. As the top bit of the immediate is
   // encoded in bit 31 of the instruction, we can reuse the Rt form, which
   // uses bit 31 (normally "sf") to choose the register size.
-  const char *form = "'Rt, 'IS, 'TImmTest";
+  const char *form = "'Rt, 'It, 'TImmTest";
 
   switch (instr->Mask(TestBranchMask)) {
     case TBZ:
@@ -1086,7 +1087,7 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) {
 #undef LS_UNSIGNEDOFFSET
     case PRFM_unsigned:
       mnemonic = "prfm";
-      form = "'PrefOp, ['Xns'ILU]";
+      form = "'prefOp, ['Xns'ILU]";
   }
   Format(instr, mnemonic, form);
 }
@@ -1165,7 +1166,7 @@ void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) {
 #undef LS_REGISTEROFFSET
     case PRFM_reg:
       mnemonic = "prfm";
-      form = "'PrefOp, ['Xns, 'Offsetreg]";
+      form = "'prefOp, ['Xns, 'Offsetreg]";
   }
   Format(instr, mnemonic, form);
 }
@@ -1180,7 +1181,7 @@ void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) {
   const char *form_s = "'St, ['Xns'ILS]";
   const char *form_d = "'Dt, ['Xns'ILS]";
   const char *form_q = "'Qt, ['Xns'ILS]";
-  const char *form_prefetch = "'PrefOp, ['Xns'ILS]";
+  const char *form_prefetch = "'prefOp, ['Xns'ILS]";
 
   switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
     case STURB_w:
@@ -1303,7 +1304,7 @@ void Disassembler::VisitLoadLiteral(const Instruction *instr) {
     }
     case PRFM_lit: {
       mnemonic = "prfm";
-      form = "'PrefOp, 'ILLiteral 'LValue";
+      form = "'prefOp, 'ILLiteral 'LValue";
       break;
     }
     default:
@@ -1486,14 +1487,14 @@ void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) {
   V(CASAH,    "casah",  "'Ws, 'Wt")                   \
   V(CASLH,    "caslh",  "'Ws, 'Wt")                   \
   V(CASALH,   "casalh", "'Ws, 'Wt")                   \
-  V(CASP_w,   "casp",   "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
-  V(CASP_x,   "casp",   "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
-  V(CASPA_w,  "caspa",  "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
-  V(CASPA_x,  "caspa",  "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
-  V(CASPL_w,  "caspl",  "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
-  V(CASPL_x,  "caspl",  "'Xs, 'X(s+1), 'Xt, 'X(t+1)") \
-  V(CASPAL_w, "caspal", "'Ws, 'W(s+1), 'Wt, 'W(t+1)") \
-  V(CASPAL_x, "caspal", "'Xs, 'X(s+1), 'Xt, 'X(t+1)")
+  V(CASP_w,   "casp",   "'Ws, 'Ws+, 'Wt, 'Wt+")       \
+  V(CASP_x,   "casp",   "'Xs, 'Xs+, 'Xt, 'Xt+")       \
+  V(CASPA_w,  "caspa",  "'Ws, 'Ws+, 'Wt, 'Wt+")       \
+  V(CASPA_x,  "caspa",  "'Xs, 'Xs+, 'Xt, 'Xt+")       \
+  V(CASPL_w,  "caspl",  "'Ws, 'Ws+, 'Wt, 'Wt+")       \
+  V(CASPL_x,  "caspl",  "'Xs, 'Xs+, 'Xt, 'Xt+")       \
+  V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+")       \
+  V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+")
 // clang-format on
 
 
@@ -1898,15 +1899,15 @@ void Disassembler::VisitFPImmediate(const Instruction *instr) {
   switch (instr->Mask(FPImmediateMask)) {
     case FMOV_h_imm:
       mnemonic = "fmov";
-      form = "'Hd, 'IFPHalf";
+      form = "'Hd, 'IFP";
       break;
     case FMOV_s_imm:
       mnemonic = "fmov";
-      form = "'Sd, 'IFPSingle";
+      form = "'Sd, 'IFP";
       break;
     case FMOV_d_imm:
       mnemonic = "fmov";
-      form = "'Dd, 'IFPDouble";
+      form = "'Dd, 'IFP";
       break;
     default:
       VIXL_UNREACHABLE();
@@ -3409,7 +3410,7 @@ void Disassembler::VisitNEONCopy(const Instruction *instr) {
   } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) {
     mnemonic = "smov";
     nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
-    form = "'Rdq, 'Vn.%s['IVInsIndex1]";
+    form = "'R30d, 'Vn.%s['IVInsIndex1]";
   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
     mnemonic = "dup";
     form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
@@ -4006,19 +4007,16 @@ void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) {
           }
         } else {  // cmode<0> == '1'
           mnemonic = "fmov";
+          form = "'Vt.%s, 'IFPNeon";
           if (half_enc == 1) {
-            form = "'Vt.%s, 'IVMIImmFPHalf";
             nfd.SetFormatMap(0, &map_h);
           } else if (op == 0) {
-            form = "'Vt.%s, 'IVMIImmFPSingle";
             nfd.SetFormatMap(0, &map_s);
+          } else if (q == 1) {
+            form = "'Vt.2d, 'IFPNeon";
           } else {
-            if (q == 1) {
-              form = "'Vt.2d, 'IVMIImmFPDouble";
-            } else {
-              mnemonic = "unallocated";
-              form = "(NEONModifiedImmediate)";
-            }
+            mnemonic = "unallocated";
+            form = "(NEONModifiedImmediate)";
           }
         }
       }
@@ -4926,6 +4924,4582 @@ void Disassembler::VisitNEONPerm(const Instruction *instr) {
   Format(instr, mnemonic, nfd.Substitute(form));
 }
 
+void Disassembler::
+    VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
+        const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]";
+
+  switch (instr->Mask(
+      SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
+    case LD1H_z_p_bz_s_x32_scaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SH_z_p_bz_s_x32_scaled:
+      mnemonic = "ld1sh";
+      break;
+    case LDFF1H_z_p_bz_s_x32_scaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SH_z_p_bz_s_x32_scaled:
+      mnemonic = "ldff1sh";
+      break;
+    default:
+      form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]";
+
+  switch (
+      instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
+    case LD1W_z_p_bz_s_x32_scaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1W_z_p_bz_s_x32_scaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
+    const Instruction *instr) {
+  const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]";
+
+  const char *mnemonic = "unimplemented";
+  switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_s_x32_unscaled:
+      mnemonic = "ld1b";
+      break;
+    case LD1H_z_p_bz_s_x32_unscaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SB_z_p_bz_s_x32_unscaled:
+      mnemonic = "ld1sb";
+      break;
+    case LD1SH_z_p_bz_s_x32_unscaled:
+      mnemonic = "ld1sh";
+      break;
+    case LD1W_z_p_bz_s_x32_unscaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1B_z_p_bz_s_x32_unscaled:
+      mnemonic = "ldff1b";
+      break;
+    case LDFF1H_z_p_bz_s_x32_unscaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SB_z_p_bz_s_x32_unscaled:
+      mnemonic = "ldff1sb";
+      break;
+    case LDFF1SH_z_p_bz_s_x32_unscaled:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1W_z_p_bz_s_x32_unscaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm(
+    const Instruction *instr) {
+  const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]";
+  const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]";
+  const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]";
+  const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]";
+  const char *form_imm;
+
+  const char *mnemonic = "unimplemented";
+  switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
+    case LD1B_z_p_ai_s:
+      mnemonic = "ld1b";
+      form_imm = form_imm_b;
+      break;
+    case LD1H_z_p_ai_s:
+      mnemonic = "ld1h";
+      form_imm = form_imm_h;
+      break;
+    case LD1SB_z_p_ai_s:
+      mnemonic = "ld1sb";
+      form_imm = form_imm_b;
+      break;
+    case LD1SH_z_p_ai_s:
+      mnemonic = "ld1sh";
+      form_imm = form_imm_h;
+      break;
+    case LD1W_z_p_ai_s:
+      mnemonic = "ld1w";
+      form_imm = form_imm_w;
+      break;
+    case LDFF1B_z_p_ai_s:
+      mnemonic = "ldff1b";
+      form_imm = form_imm_b;
+      break;
+    case LDFF1H_z_p_ai_s:
+      mnemonic = "ldff1h";
+      form_imm = form_imm_h;
+      break;
+    case LDFF1SB_z_p_ai_s:
+      mnemonic = "ldff1sb";
+      form_imm = form_imm_b;
+      break;
+    case LDFF1SH_z_p_ai_s:
+      mnemonic = "ldff1sh";
+      form_imm = form_imm_h;
+      break;
+    case LDFF1W_z_p_ai_s:
+      mnemonic = "ldff1w";
+      form_imm = form_imm_w;
+      break;
+    default:
+      form = "(SVE32BitGatherLoad_VectorPlusImm)";
+      form_imm = form;
+      break;
+  }
+  if (instr->ExtractBits(20, 16) != 0) form = form_imm;
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.s, '?22:suxtw";
+  const char *suffix = NULL;
+
+  switch (
+      instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
+    case PRFB_i_p_bz_s_x32_scaled:
+      mnemonic = "prfb";
+      suffix = "]";
+      break;
+    case PRFD_i_p_bz_s_x32_scaled:
+      mnemonic = "prfd";
+      suffix = " #3]";
+      break;
+    case PRFH_i_p_bz_s_x32_scaled:
+      mnemonic = "prfh";
+      suffix = " #1]";
+      break;
+    case PRFW_i_p_bz_s_x32_scaled:
+      mnemonic = "prfw";
+      suffix = " #2]";
+      break;
+    default:
+      form = "(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = (instr->ExtractBits(20, 16) != 0)
+                         ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]"
+                         : "'prefSVEOp, 'Pgl, ['Zn.s]";
+
+  switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
+    case PRFB_i_p_ai_s:
+      mnemonic = "prfb";
+      break;
+    case PRFD_i_p_ai_s:
+      mnemonic = "prfd";
+      break;
+    case PRFH_i_p_ai_s:
+      mnemonic = "prfh";
+      break;
+    case PRFW_i_p_ai_s:
+      mnemonic = "prfw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]";
+
+  switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
+    case ST1H_z_p_bz_s_x32_scaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_s_x32_scaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]";
+
+  switch (
+      instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_s_x32_unscaled:
+      mnemonic = "st1b";
+      break;
+    case ST1H_z_p_bz_s_x32_unscaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_s_x32_unscaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.s}, 'Pgl, ['Zn.s";
+  const char *suffix = NULL;
+
+  bool is_zero = instr->ExtractBits(20, 16) == 0;
+
+  switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
+    case ST1B_z_p_ai_s:
+      mnemonic = "st1b";
+      suffix = is_zero ? "]" : ", #'u2016]";
+      break;
+    case ST1H_z_p_ai_s:
+      mnemonic = "st1h";
+      suffix = is_zero ? "]" : ", #'u2016*2]";
+      break;
+    case ST1W_z_p_ai_s:
+      mnemonic = "st1w";
+      suffix = is_zero ? "]" : ", #'u2016*4]";
+      break;
+    default:
+      form = "(SVE32BitScatterStore_VectorPlusImm)";
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]";
+
+  switch (instr->Mask(
+      SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
+    case LD1D_z_p_bz_d_x32_scaled:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_bz_d_x32_scaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SH_z_p_bz_d_x32_scaled:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_bz_d_x32_scaled:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_bz_d_x32_scaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1D_z_p_bz_d_x32_scaled:
+      mnemonic = "ldff1d";
+      break;
+    case LDFF1H_z_p_bz_d_x32_scaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SH_z_p_bz_d_x32_scaled:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1SW_z_p_bz_d_x32_scaled:
+      mnemonic = "ldff1sw";
+      break;
+    case LDFF1W_z_p_bz_d_x32_scaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]";
+
+  switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
+    case LD1D_z_p_bz_d_64_scaled:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_bz_d_64_scaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SH_z_p_bz_d_64_scaled:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_bz_d_64_scaled:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_bz_d_64_scaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1D_z_p_bz_d_64_scaled:
+      mnemonic = "ldff1d";
+      break;
+    case LDFF1H_z_p_bz_d_64_scaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SH_z_p_bz_d_64_scaled:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1SW_z_p_bz_d_64_scaled:
+      mnemonic = "ldff1sw";
+      break;
+    case LDFF1W_z_p_bz_d_64_scaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]";
+
+  switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1b";
+      break;
+    case LD1D_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SB_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1sb";
+      break;
+    case LD1SH_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_bz_d_64_unscaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1B_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1b";
+      break;
+    case LDFF1D_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1d";
+      break;
+    case LDFF1H_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SB_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1sb";
+      break;
+    case LDFF1SH_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1SW_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1sw";
+      break;
+    case LDFF1W_z_p_bz_d_64_unscaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+    VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
+        const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]";
+
+  switch (instr->Mask(
+      SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1b";
+      break;
+    case LD1D_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1h";
+      break;
+    case LD1SB_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1sb";
+      break;
+    case LD1SH_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_bz_d_x32_unscaled:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1B_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1b";
+      break;
+    case LDFF1D_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1d";
+      break;
+    case LDFF1H_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SB_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1sb";
+      break;
+    case LDFF1SH_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1SW_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1sw";
+      break;
+    case LDFF1W_z_p_bz_d_x32_unscaled:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm(
+    const Instruction *instr) {
+  const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d]";
+  const char *form_imm[4] = {"{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016]",
+                             "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*2]",
+                             "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*4]",
+                             "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*8]"};
+
+  if (instr->ExtractBits(20, 16) != 0) {
+    unsigned msz = instr->ExtractBits(24, 23);
+    bool sign_extend = instr->ExtractBit(14) == 0;
+    if ((msz == kDRegSizeInBytesLog2) && sign_extend) {
+      form = "(SVE64BitGatherLoad_VectorPlusImm)";
+    } else {
+      VIXL_ASSERT(msz < ArrayLength(form_imm));
+      form = form_imm[msz];
+    }
+  }
+
+  const char *mnemonic = "unimplemented";
+  switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
+    case LD1B_z_p_ai_d:
+      mnemonic = "ld1b";
+      break;
+    case LD1D_z_p_ai_d:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_ai_d:
+      mnemonic = "ld1h";
+      break;
+    case LD1SB_z_p_ai_d:
+      mnemonic = "ld1sb";
+      break;
+    case LD1SH_z_p_ai_d:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_ai_d:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_ai_d:
+      mnemonic = "ld1w";
+      break;
+    case LDFF1B_z_p_ai_d:
+      mnemonic = "ldff1b";
+      break;
+    case LDFF1D_z_p_ai_d:
+      mnemonic = "ldff1d";
+      break;
+    case LDFF1H_z_p_ai_d:
+      mnemonic = "ldff1h";
+      break;
+    case LDFF1SB_z_p_ai_d:
+      mnemonic = "ldff1sb";
+      break;
+    case LDFF1SH_z_p_ai_d:
+      mnemonic = "ldff1sh";
+      break;
+    case LDFF1SW_z_p_ai_d:
+      mnemonic = "ldff1sw";
+      break;
+    case LDFF1W_z_p_ai_d:
+      mnemonic = "ldff1w";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)";
+
+  switch (
+      instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
+    case PRFB_i_p_bz_d_64_scaled:
+      mnemonic = "prfb";
+      form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]";
+      break;
+    case PRFD_i_p_bz_d_64_scaled:
+      mnemonic = "prfd";
+      form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]";
+      break;
+    case PRFH_i_p_bz_d_64_scaled:
+      mnemonic = "prfh";
+      form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]";
+      break;
+    case PRFW_i_p_bz_d_64_scaled:
+      mnemonic = "prfw";
+      form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+    VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
+        const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw";
+  const char *suffix = NULL;
+
+  switch (instr->Mask(
+      SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+    case PRFB_i_p_bz_d_x32_scaled:
+      mnemonic = "prfb";
+      suffix = " ]";
+      break;
+    case PRFD_i_p_bz_d_x32_scaled:
+      mnemonic = "prfd";
+      suffix = " #3]";
+      break;
+    case PRFH_i_p_bz_d_x32_scaled:
+      mnemonic = "prfh";
+      suffix = " #1]";
+      break;
+    case PRFW_i_p_bz_d_x32_scaled:
+      mnemonic = "prfw";
+      suffix = " #2]";
+      break;
+    default:
+      form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = (instr->ExtractBits(20, 16) != 0)
+                         ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]"
+                         : "'prefSVEOp, 'Pgl, ['Zn.d]";
+
+  switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
+    case PRFB_i_p_ai_d:
+      mnemonic = "prfb";
+      break;
+    case PRFD_i_p_ai_d:
+      mnemonic = "prfd";
+      break;
+    case PRFH_i_p_ai_d:
+      mnemonic = "prfh";
+      break;
+    case PRFW_i_p_ai_d:
+      mnemonic = "prfw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]";
+
+  switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
+    case ST1D_z_p_bz_d_64_scaled:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_bz_d_64_scaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_d_64_scaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]";
+
+  switch (
+      instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_d_64_unscaled:
+      mnemonic = "st1b";
+      break;
+    case ST1D_z_p_bz_d_64_unscaled:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_bz_d_64_unscaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_d_64_unscaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+    VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
+        const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]";
+
+  switch (instr->Mask(
+      SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+    case ST1D_z_p_bz_d_x32_scaled:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_bz_d_x32_scaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_d_x32_scaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::
+    VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
+        const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]";
+
+  switch (instr->Mask(
+      SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_d_x32_unscaled:
+      mnemonic = "st1b";
+      break;
+    case ST1D_z_p_bz_d_x32_unscaled:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_bz_d_x32_unscaled:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bz_d_x32_unscaled:
+      mnemonic = "st1w";
+      break;
+    default:
+      form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.d}, 'Pgl, ['Zn.d";
+  const char *suffix = NULL;
+
+  bool is_zero = instr->ExtractBits(20, 16) == 0;
+
+  switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
+    case ST1B_z_p_ai_d:
+      mnemonic = "st1b";
+      suffix = is_zero ? "]" : ", #'u2016]";
+      break;
+    case ST1D_z_p_ai_d:
+      mnemonic = "st1d";
+      suffix = is_zero ? "]" : ", #'u2016*8]";
+      break;
+    case ST1H_z_p_ai_d:
+      mnemonic = "st1h";
+      suffix = is_zero ? "]" : ", #'u2016*2]";
+      break;
+    case ST1W_z_p_ai_d:
+      mnemonic = "st1w";
+      suffix = is_zero ? "]" : ", #'u2016*4]";
+      break;
+    default:
+      form = "(SVE64BitScatterStore_VectorPlusImm)";
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel";
+
+  if (instr->GetSVEImmLogical() == 0) {
+    // The immediate encoded in the instruction is not in the expected format.
+    Format(instr, "unallocated", "(SVEBitwiseImm)");
+    return;
+  }
+
+  switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
+    case AND_z_zi:
+      mnemonic = "and";
+      break;
+    case EOR_z_zi:
+      mnemonic = "eor";
+      break;
+    case ORR_z_zi:
+      mnemonic = "orr";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
+    case AND_z_p_zz:
+      mnemonic = "and";
+      break;
+    case BIC_z_p_zz:
+      mnemonic = "bic";
+      break;
+    case EOR_z_p_zz:
+      mnemonic = "eor";
+      break;
+    case ORR_z_p_zz:
+      mnemonic = "orr";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByImm_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq";
+  unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8);
+
+  if (tsize == 0) {
+    form = "(SVEBitwiseShiftByImm_Predicated)";
+  } else {
+    switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+      case ASRD_z_p_zi:
+        mnemonic = "asrd";
+        break;
+      case ASR_z_p_zi:
+        mnemonic = "asr";
+        break;
+      case LSL_z_p_zi:
+        mnemonic = "lsl";
+        form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep";
+        break;
+      case LSR_z_p_zi:
+        mnemonic = "lsr";
+        break;
+      default:
+        break;
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByVector_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+    case ASRR_z_p_zz:
+      mnemonic = "asrr";
+      break;
+    case ASR_z_p_zz:
+      mnemonic = "asr";
+      break;
+    case LSLR_z_p_zz:
+      mnemonic = "lslr";
+      break;
+    case LSL_z_p_zz:
+      mnemonic = "lsl";
+      break;
+    case LSRR_z_p_zz:
+      mnemonic = "lsrr";
+      break;
+    case LSR_z_p_zz:
+      mnemonic = "lsr";
+      break;
+    default:
+      form = "(SVEBitwiseShiftByVector_Predicated)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d";
+
+  if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+    form = "(SVEBitwiseShiftByWideElements_Predicated)";
+  } else {
+    switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+      case ASR_z_p_zw:
+        mnemonic = "asr";
+        break;
+      case LSL_z_p_zw:
+        mnemonic = "lsl";
+        break;
+      case LSR_z_p_zw:
+        mnemonic = "lsr";
+        break;
+      default:
+        form = "(SVEBitwiseShiftByWideElements_Predicated)";
+        break;
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
+  VIXL_ASSERT(IsUintN(8 << lane_bytes_log2, value));
+
+  // Duplicate lane-sized value across double word.
+  switch (lane_bytes_log2) {
+    case 0:
+      value *= 0x0101010101010101;
+      break;
+    case 1:
+      value *= 0x0001000100010001;
+      break;
+    case 2:
+      value *= 0x0000000100000001;
+      break;
+    case 3:  // Nothing to do
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+
+  if ((value & 0xff) == 0) {
+    // Check for 16-bit patterns. Set least-significant 16 bits, to make tests
+    // easier; we already checked least-significant byte is zero above.
+    uint64_t generic_value = value | 0xffff;
+
+    // Check 0x00000000_0000pq00 or 0xffffffff_ffffpq00.
+    if ((generic_value == 0xffff) || (generic_value == UINT64_MAX)) {
+      return false;
+    }
+
+    // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00.
+    uint64_t rotvalue = RotateRight(value, 32, 64);
+    if (value == rotvalue) {
+      generic_value &= 0xffffffff;
+      if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) {
+        return false;
+      }
+    }
+
+    // Check 0xpq00pq00_pq00pq00.
+    rotvalue = RotateRight(value, 16, 64);
+    if (value == rotvalue) {
+      return false;
+    }
+  } else {
+    // Check for 8-bit patterns. Set least-significant byte, to make tests
+    // easier.
+    uint64_t generic_value = value | 0xff;
+
+    // Check 0x00000000_000000pq or 0xffffffff_ffffffpq.
+    if ((generic_value == 0xff) || (generic_value == UINT64_MAX)) {
+      return false;
+    }
+
+    // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq.
+    uint64_t rotvalue = RotateRight(value, 32, 64);
+    if (value == rotvalue) {
+      generic_value &= 0xffffffff;
+      if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) {
+        return false;
+      }
+    }
+
+    // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq.
+    rotvalue = RotateRight(value, 16, 64);
+    if (value == rotvalue) {
+      generic_value &= 0xffff;
+      if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) {
+        return false;
+      }
+    }
+
+    // Check 0xpqpqpqpq_pqpqpqpq.
+    rotvalue = RotateRight(value, 8, 64);
+    if (value == rotvalue) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void Disassembler::VisitSVEBroadcastBitmaskImm(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBroadcastBitmaskImm)";
+
+  switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
+    case DUPM_z_i: {
+      uint64_t imm = instr->GetSVEImmLogical();
+      if (imm != 0) {
+        int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+        mnemonic = SVEMoveMaskPreferred(imm, lane_size) ? "mov" : "dupm";
+        form = "'Zd.'tl, 'ITriSvel";
+      }
+      break;
+    }
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastFPImm_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBroadcastFPImm_Unpredicated)";
+
+  switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
+    case FDUP_z_i:
+      // The preferred disassembly for fdup is "fmov".
+      mnemonic = "fmov";
+      form = "'Zd.'t, 'IFPSve";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastGeneralRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBroadcastGeneralRegister)";
+
+  switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
+    case DUP_z_r:
+      // The preferred disassembly for dup is "mov".
+      mnemonic = "mov";
+      if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+        form = "'Zd.'t, 'Xns";
+      } else {
+        form = "'Zd.'t, 'Wns";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBroadcastIndexElement)";
+
+  switch (instr->Mask(SVEBroadcastIndexElementMask)) {
+    case DUP_z_zi: {
+      // The tsz field must not be zero.
+      int tsz = instr->ExtractBits(20, 16);
+      if (tsz != 0) {
+        // The preferred disassembly for dup is "mov".
+        mnemonic = "mov";
+        int imm2 = instr->ExtractBits(23, 22);
+        if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) {
+          // If imm2:tsz has one set bit, the index is zero. This is
+          // disassembled as a mov from a b/h/s/d/q scalar register.
+          form = "'Zd.'tszx, 'tszx'u0905";
+        } else {
+          form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]";
+        }
+      }
+      break;
+    }
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBroadcastIntImm_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBroadcastIntImm_Unpredicated)";
+
+  switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
+    case DUP_z_i:
+      // The encoding of byte-sized lanes with lsl #8 is undefined.
+      if ((instr->GetSVEVectorFormat() == kFormatVnB) &&
+          (instr->ExtractBit(13) == 1))
+        break;
+
+      // The preferred disassembly for dup is "mov".
+      mnemonic = "mov";
+      form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, #'s1205"
+                                          : "'Zd.'t, #'s1205, lsl #8";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVECompressActiveElements)";
+
+  switch (instr->Mask(SVECompressActiveElementsMask)) {
+    case COMPACT_z_p_z:
+      // The top bit of size is always set for compact, so 't can only be
+      // substituted with types S and D.
+      VIXL_ASSERT(instr->ExtractBit(23) == 1);
+      mnemonic = "compact";
+      form = "'Zd.'t, 'Pgl, 'Zn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyBroadcastElementToVector(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+    case CLASTA_z_p_zz:
+      mnemonic = "clasta";
+      break;
+    case CLASTB_z_p_zz:
+      mnemonic = "clastb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t";
+
+  if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+    form = "'Xd, p'u1210, 'Xd, 'Zn.'t";
+  }
+
+  switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
+    case CLASTA_r_p_z:
+      mnemonic = "clasta";
+      break;
+    case CLASTB_r_p_z:
+      mnemonic = "clastb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
+
+  switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
+    case CLASTA_v_p_z:
+      mnemonic = "clasta";
+      break;
+    case CLASTB_v_p_z:
+      mnemonic = "clastb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConditionallyTerminateScalars(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm";
+
+  switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
+    case CTERMEQ_rr:
+      mnemonic = "ctermeq";
+      break;
+    case CTERMNE_rr:
+      mnemonic = "ctermne";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEConstructivePrefix_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEConstructivePrefix_Unpredicated)";
+
+  switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
+    case MOVPRFX_z_z:
+      mnemonic = "movprfx";
+      form = "'Zd, 'Zn";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+
+  bool rm_is_zr = instr->GetRm() == kZeroRegCode;
+
+  const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+  const char *suffix = NULL;
+
+  switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) {
+    case LDFF1B_z_p_br_u16:
+    case LDFF1B_z_p_br_u32:
+    case LDFF1B_z_p_br_u64:
+    case LDFF1B_z_p_br_u8:
+      mnemonic = "ldff1b";
+      suffix = rm_is_zr ? "]" : ", 'Xm]";
+      break;
+    case LDFF1D_z_p_br_u64:
+      mnemonic = "ldff1d";
+      suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]";
+      break;
+    case LDFF1H_z_p_br_u16:
+    case LDFF1H_z_p_br_u32:
+    case LDFF1H_z_p_br_u64:
+      mnemonic = "ldff1h";
+      suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
+      break;
+    case LDFF1SB_z_p_br_s16:
+    case LDFF1SB_z_p_br_s32:
+    case LDFF1SB_z_p_br_s64:
+      mnemonic = "ldff1sb";
+      suffix = rm_is_zr ? "]" : ", 'Xm]";
+      break;
+    case LDFF1SH_z_p_br_s32:
+    case LDFF1SH_z_p_br_s64:
+      mnemonic = "ldff1sh";
+      suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
+      break;
+    case LDFF1SW_z_p_br_s64:
+      mnemonic = "ldff1sw";
+      suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
+      break;
+    case LDFF1W_z_p_br_u32:
+    case LDFF1W_z_p_br_u64:
+      mnemonic = "ldff1w";
+      suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
+      break;
+    default:
+      form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)";
+      break;
+  }
+
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+  const char *suffix =
+      (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+
+  switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
+    case LDNF1B_z_p_bi_u16:
+    case LDNF1B_z_p_bi_u32:
+    case LDNF1B_z_p_bi_u64:
+    case LDNF1B_z_p_bi_u8:
+      mnemonic = "ldnf1b";
+      break;
+    case LDNF1D_z_p_bi_u64:
+      mnemonic = "ldnf1d";
+      break;
+    case LDNF1H_z_p_bi_u16:
+    case LDNF1H_z_p_bi_u32:
+    case LDNF1H_z_p_bi_u64:
+      mnemonic = "ldnf1h";
+      break;
+    case LDNF1SB_z_p_bi_s16:
+    case LDNF1SB_z_p_bi_s32:
+    case LDNF1SB_z_p_bi_s64:
+      mnemonic = "ldnf1sb";
+      break;
+    case LDNF1SH_z_p_bi_s32:
+    case LDNF1SH_z_p_bi_s64:
+      mnemonic = "ldnf1sh";
+      break;
+    case LDNF1SW_z_p_bi_s64:
+      mnemonic = "ldnf1sw";
+      break;
+    case LDNF1W_z_p_bi_u32:
+    case LDNF1W_z_p_bi_u64:
+      mnemonic = "ldnf1w";
+      break;
+    default:
+      form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)";
+      suffix = NULL;
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)";
+
+  const char *suffix =
+      (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+  switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
+    case LDNT1B_z_p_bi_contiguous:
+      mnemonic = "ldnt1b";
+      form = "{'Zt.b}, 'Pgl/z, ['Xns";
+      break;
+    case LDNT1D_z_p_bi_contiguous:
+      mnemonic = "ldnt1d";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      break;
+    case LDNT1H_z_p_bi_contiguous:
+      mnemonic = "ldnt1h";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns";
+      break;
+    case LDNT1W_z_p_bi_contiguous:
+      mnemonic = "ldnt1w";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      break;
+    default:
+      suffix = NULL;
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)";
+
+  switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
+    case LDNT1B_z_p_br_contiguous:
+      mnemonic = "ldnt1b";
+      form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
+      break;
+    case LDNT1D_z_p_br_contiguous:
+      mnemonic = "ldnt1d";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
+      break;
+    case LDNT1H_z_p_br_contiguous:
+      mnemonic = "ldnt1h";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
+      break;
+    case LDNT1W_z_p_br_contiguous:
+      mnemonic = "ldnt1w";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)";
+
+  const char *suffix =
+      (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+  switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
+    case STNT1B_z_p_bi_contiguous:
+      mnemonic = "stnt1b";
+      form = "{'Zt.b}, 'Pgl, ['Xns";
+      break;
+    case STNT1D_z_p_bi_contiguous:
+      mnemonic = "stnt1d";
+      form = "{'Zt.d}, 'Pgl, ['Xns";
+      break;
+    case STNT1H_z_p_bi_contiguous:
+      mnemonic = "stnt1h";
+      form = "{'Zt.h}, 'Pgl, ['Xns";
+      break;
+    case STNT1W_z_p_bi_contiguous:
+      mnemonic = "stnt1w";
+      form = "{'Zt.s}, 'Pgl, ['Xns";
+      break;
+    default:
+      suffix = NULL;
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusScalar)";
+
+  switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
+    case STNT1B_z_p_br_contiguous:
+      mnemonic = "stnt1b";
+      form = "{'Zt.b}, 'Pgl, ['Xns, 'Rm]";
+      break;
+    case STNT1D_z_p_br_contiguous:
+      mnemonic = "stnt1d";
+      form = "{'Zt.d}, 'Pgl, ['Xns, 'Rm, lsl #3]";
+      break;
+    case STNT1H_z_p_br_contiguous:
+      mnemonic = "stnt1h";
+      form = "{'Zt.h}, 'Pgl, ['Xns, 'Rm, lsl #1]";
+      break;
+    case STNT1W_z_p_br_contiguous:
+      mnemonic = "stnt1w";
+      form = "{'Zt.s}, 'Pgl, ['Xns, 'Rm, lsl #2]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = (instr->ExtractBits(21, 16) != 0)
+                         ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]"
+                         : "'prefSVEOp, 'Pgl, ['Xns]";
+
+  switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
+    case PRFB_i_p_bi_s:
+      mnemonic = "prfb";
+      break;
+    case PRFD_i_p_bi_s:
+      mnemonic = "prfd";
+      break;
+    case PRFH_i_p_bi_s:
+      mnemonic = "prfh";
+      break;
+    case PRFW_i_p_bi_s:
+      mnemonic = "prfw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEContiguousPrefetch_ScalarPlusScalar)";
+
+  if (instr->GetRm() != kZeroRegCode) {
+    switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
+      case PRFB_i_p_br_s:
+        mnemonic = "prfb";
+        form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm]";
+        break;
+      case PRFD_i_p_br_s:
+        mnemonic = "prfd";
+        form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #3]";
+        break;
+      case PRFH_i_p_br_s:
+        mnemonic = "prfh";
+        form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #1]";
+        break;
+      case PRFW_i_p_br_s:
+        mnemonic = "prfw";
+        form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #2]";
+        break;
+      default:
+        break;
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousStore_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+
+  // The 'size' field isn't in the usual place here.
+  const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]";
+  if (instr->ExtractBits(19, 16) == 0) {
+    form = "{'Zt.'tls}, 'Pgl, ['Xns]";
+  }
+
+  switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
+    case ST1B_z_p_bi:
+      mnemonic = "st1b";
+      break;
+    case ST1D_z_p_bi:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_bi:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_bi:
+      mnemonic = "st1w";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+
+  // The 'size' field isn't in the usual place here.
+  const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]";
+
+  switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
+    case ST1B_z_p_br:
+      mnemonic = "st1b";
+      break;
+    case ST1D_z_p_br:
+      mnemonic = "st1d";
+      break;
+    case ST1H_z_p_br:
+      mnemonic = "st1h";
+      break;
+    case ST1W_z_p_br:
+      mnemonic = "st1w";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVECopyFPImm_Predicated)";
+
+  switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
+    case FCPY_z_p_i:
+      // The preferred disassembly for fcpy is "fmov".
+      mnemonic = "fmov";
+      form = "'Zd.'t, 'Pm/m, 'IFPSve";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyGeneralRegisterToVector_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVECopyGeneralRegisterToVector_Predicated)";
+
+  switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_r:
+      // The preferred disassembly for cpy is "mov".
+      mnemonic = "mov";
+      form = "'Zd.'t, 'Pgl/m, 'Wns";
+      if (instr->GetSVESize() == kXRegSizeInBytesLog2) {
+        form = "'Zd.'t, 'Pgl/m, 'Xns";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVECopyIntImm_Predicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVECopyIntImm_Predicated)";
+  const char *suffix = NULL;
+
+  switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
+    case CPY_z_p_i: {
+      // The preferred disassembly for cpy is "mov".
+      mnemonic = "mov";
+      form = "'Zd.'t, 'Pm/'?14:mz, #'s1205";
+      if (instr->ExtractBit(13) != 0) suffix = ", lsl #8";
+      break;
+    }
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVECopySIMDFPScalarRegisterToVector_Predicated)";
+
+  switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_v:
+      // The preferred disassembly for cpy is "mov".
+      mnemonic = "mov";
+      form = "'Zd.'t, 'Pgl/m, 'Vnv";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEExtractElementToGeneralRegister(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Wd, 'Pgl, 'Zn.'t";
+
+  if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+    form = "'Xd, p'u1210, 'Zn.'t";
+  }
+
+  switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
+    case LASTA_r_p_z:
+      mnemonic = "lasta";
+      break;
+    case LASTB_r_p_z:
+      mnemonic = "lastb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
+
+  switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
+    case LASTA_v_p_z:
+      mnemonic = "lasta";
+      break;
+    case LASTB_v_p_z:
+      mnemonic = "lastb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFFRInitialise)";
+
+  switch (instr->Mask(SVEFFRInitialiseMask)) {
+    case SETFFR_f:
+      mnemonic = "setffr";
+      form = " ";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFFRWriteFromPredicate)";
+
+  switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
+    case WRFFR_f_p:
+      mnemonic = "wrffr";
+      form = "'Pn.b";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmeticWithImm_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0";
+  const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5";
+  const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0";
+  const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0";
+  int i1 = instr->ExtractBit(5);
+  const char *form = i1 ? form10 : form00;
+
+  switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+    case FADD_z_p_zs:
+      mnemonic = "fadd";
+      form = i1 ? form10 : form05;
+      break;
+    case FMAXNM_z_p_zs:
+      mnemonic = "fmaxnm";
+      break;
+    case FMAX_z_p_zs:
+      mnemonic = "fmax";
+      break;
+    case FMINNM_z_p_zs:
+      mnemonic = "fminnm";
+      break;
+    case FMIN_z_p_zs:
+      mnemonic = "fmin";
+      break;
+    case FMUL_z_p_zs:
+      mnemonic = "fmul";
+      form = i1 ? form20 : form05;
+      break;
+    case FSUBR_z_p_zs:
+      mnemonic = "fsubr";
+      form = i1 ? form10 : form05;
+      break;
+    case FSUB_z_p_zs:
+      mnemonic = "fsub";
+      form = i1 ? form10 : form05;
+      break;
+    default:
+      form = "(SVEFPArithmeticWithImm_Predicated)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
+    case FABD_z_p_zz:
+      mnemonic = "fabd";
+      break;
+    case FADD_z_p_zz:
+      mnemonic = "fadd";
+      break;
+    case FDIVR_z_p_zz:
+      mnemonic = "fdivr";
+      break;
+    case FDIV_z_p_zz:
+      mnemonic = "fdiv";
+      break;
+    case FMAXNM_z_p_zz:
+      mnemonic = "fmaxnm";
+      break;
+    case FMAX_z_p_zz:
+      mnemonic = "fmax";
+      break;
+    case FMINNM_z_p_zz:
+      mnemonic = "fminnm";
+      break;
+    case FMIN_z_p_zz:
+      mnemonic = "fmin";
+      break;
+    case FMULX_z_p_zz:
+      mnemonic = "fmulx";
+      break;
+    case FMUL_z_p_zz:
+      mnemonic = "fmul";
+      break;
+    case FSCALE_z_p_zz:
+      mnemonic = "fscale";
+      break;
+    case FSUBR_z_p_zz:
+      mnemonic = "fsubr";
+      break;
+    case FSUB_z_p_zz:
+      mnemonic = "fsub";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPConvertPrecision)";
+
+  switch (instr->Mask(SVEFPConvertPrecisionMask)) {
+    case FCVT_z_p_z_d2h:
+      mnemonic = "fcvt";
+      form = "'Zd.h, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVT_z_p_z_d2s:
+      mnemonic = "fcvt";
+      form = "'Zd.s, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVT_z_p_z_h2d:
+      mnemonic = "fcvt";
+      form = "'Zd.d, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVT_z_p_z_h2s:
+      mnemonic = "fcvt";
+      form = "'Zd.s, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVT_z_p_z_s2d:
+      mnemonic = "fcvt";
+      form = "'Zd.d, 'Pgl/m, 'Zn.s";
+      break;
+    case FCVT_z_p_z_s2h:
+      mnemonic = "fcvt";
+      form = "'Zd.h, 'Pgl/m, 'Zn.s";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPConvertToInt)";
+
+  switch (instr->Mask(SVEFPConvertToIntMask)) {
+    case FCVTZS_z_p_z_d2w:
+      mnemonic = "fcvtzs";
+      form = "'Zd.s, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVTZS_z_p_z_d2x:
+      mnemonic = "fcvtzs";
+      form = "'Zd.d, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVTZS_z_p_z_fp162h:
+      mnemonic = "fcvtzs";
+      form = "'Zd.h, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZS_z_p_z_fp162w:
+      mnemonic = "fcvtzs";
+      form = "'Zd.s, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZS_z_p_z_fp162x:
+      mnemonic = "fcvtzs";
+      form = "'Zd.d, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZS_z_p_z_s2w:
+      mnemonic = "fcvtzs";
+      form = "'Zd.s, 'Pgl/m, 'Zn.s";
+      break;
+    case FCVTZS_z_p_z_s2x:
+      mnemonic = "fcvtzs";
+      form = "'Zd.d, 'Pgl/m, 'Zn.s";
+      break;
+    case FCVTZU_z_p_z_d2w:
+      mnemonic = "fcvtzu";
+      form = "'Zd.s, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVTZU_z_p_z_d2x:
+      mnemonic = "fcvtzu";
+      form = "'Zd.d, 'Pgl/m, 'Zn.d";
+      break;
+    case FCVTZU_z_p_z_fp162h:
+      mnemonic = "fcvtzu";
+      form = "'Zd.h, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZU_z_p_z_fp162w:
+      mnemonic = "fcvtzu";
+      form = "'Zd.s, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZU_z_p_z_fp162x:
+      mnemonic = "fcvtzu";
+      form = "'Zd.d, 'Pgl/m, 'Zn.h";
+      break;
+    case FCVTZU_z_p_z_s2w:
+      mnemonic = "fcvtzu";
+      form = "'Zd.s, 'Pgl/m, 'Zn.s";
+      break;
+    case FCVTZU_z_p_z_s2x:
+      mnemonic = "fcvtzu";
+      form = "'Zd.d, 'Pgl/m, 'Zn.s";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPExponentialAccelerator)";
+
+  unsigned size = instr->GetSVESize();
+  switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
+    case FEXPA_z_z:
+      if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+          (size == kDRegSizeInBytesLog2)) {
+        mnemonic = "fexpa";
+        form = "'Zd.'t, 'Zn.'t";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+  switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
+    case FRINTA_z_p_z:
+      mnemonic = "frinta";
+      break;
+    case FRINTI_z_p_z:
+      mnemonic = "frinti";
+      break;
+    case FRINTM_z_p_z:
+      mnemonic = "frintm";
+      break;
+    case FRINTN_z_p_z:
+      mnemonic = "frintn";
+      break;
+    case FRINTP_z_p_z:
+      mnemonic = "frintp";
+      break;
+    case FRINTX_z_p_z:
+      mnemonic = "frintx";
+      break;
+    case FRINTZ_z_p_z:
+      mnemonic = "frintz";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPTrigMulAddCoefficient)";
+
+  unsigned size = instr->GetSVESize();
+  switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
+    case FTMAD_z_zzi:
+      if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+          (size == kDRegSizeInBytesLog2)) {
+        mnemonic = "ftmad";
+        form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPTrigSelectCoefficient)";
+
+  unsigned size = instr->GetSVESize();
+  switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
+    case FTSSEL_z_zz:
+      if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+          (size == kDRegSizeInBytesLog2)) {
+        mnemonic = "ftssel";
+        form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+  if (instr->GetSVESize() == kBRegSizeInBytesLog2) {
+    form = "(SVEFPUnaryOp)";
+  } else {
+    switch (instr->Mask(SVEFPUnaryOpMask)) {
+      case FRECPX_z_p_z:
+        mnemonic = "frecpx";
+        break;
+      case FSQRT_z_p_z:
+        mnemonic = "fsqrt";
+        break;
+      default:
+        form = "(SVEFPUnaryOp)";
+        break;
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+static const char *IncDecFormHelper(const Instruction *instr,
+                                    const char *reg_pat_mul_form,
+                                    const char *reg_pat_form,
+                                    const char *reg_form) {
+  if (instr->ExtractBits(19, 16) == 0) {
+    if (instr->ExtractBits(9, 5) == SVE_ALL) {
+      // Use the register only form if the multiplier is one (encoded as zero)
+      // and the pattern is SVE_ALL.
+      return reg_form;
+    }
+    // Use the register and pattern form if the multiplier is one.
+    return reg_pat_form;
+  }
+  return reg_pat_mul_form;
+}
+
+void Disassembler::VisitSVEIncDecRegisterByElementCount(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form =
+      IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
+
+  switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
+    case DECB_r_rs:
+      mnemonic = "decb";
+      break;
+    case DECD_r_rs:
+      mnemonic = "decd";
+      break;
+    case DECH_r_rs:
+      mnemonic = "dech";
+      break;
+    case DECW_r_rs:
+      mnemonic = "decw";
+      break;
+    case INCB_r_rs:
+      mnemonic = "incb";
+      break;
+    case INCD_r_rs:
+      mnemonic = "incd";
+      break;
+    case INCH_r_rs:
+      mnemonic = "inch";
+      break;
+    case INCW_r_rs:
+      mnemonic = "incw";
+      break;
+    default:
+      form = "(SVEIncDecRegisterByElementCount)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIncDecVectorByElementCount(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = IncDecFormHelper(instr,
+                                      "'Zd.'t, 'Ipc, mul #'u1916+1",
+                                      "'Zd.'t, 'Ipc",
+                                      "'Zd.'t");
+
+  switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
+    case DECD_z_zs:
+      mnemonic = "decd";
+      break;
+    case DECH_z_zs:
+      mnemonic = "dech";
+      break;
+    case DECW_z_zs:
+      mnemonic = "decw";
+      break;
+    case INCD_z_zs:
+      mnemonic = "incd";
+      break;
+    case INCH_z_zs:
+      mnemonic = "inch";
+      break;
+    case INCW_z_zs:
+      mnemonic = "incw";
+      break;
+    default:
+      form = "(SVEIncDecVectorByElementCount)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEInsertGeneralRegister)";
+
+  switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
+    case INSR_z_r:
+      mnemonic = "insr";
+      if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+        form = "'Zd.'t, 'Xn";
+      } else {
+        form = "'Zd.'t, 'Wn";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEInsertSIMDFPScalarRegister(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEInsertSIMDFPScalarRegister)";
+
+  switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+    case INSR_z_v:
+      mnemonic = "insr";
+      form = "'Zd.'t, 'Vnv";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = (instr->ExtractBit(13) == 0)
+                         ? "'Zd.'t, 'Zd.'t, #'u1205"
+                         : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8";
+
+  switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+    case ADD_z_zi:
+      mnemonic = "add";
+      break;
+    case SQADD_z_zi:
+      mnemonic = "sqadd";
+      break;
+    case SQSUB_z_zi:
+      mnemonic = "sqsub";
+      break;
+    case SUBR_z_zi:
+      mnemonic = "subr";
+      break;
+    case SUB_z_zi:
+      mnemonic = "sub";
+      break;
+    case UQADD_z_zi:
+      mnemonic = "uqadd";
+      break;
+    case UQSUB_z_zi:
+      mnemonic = "uqsub";
+      break;
+    default:
+      form = "(SVEIntAddSubtractImm_Unpredicated)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntAddSubtractVectors_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+    case ADD_z_p_zz:
+      mnemonic = "add";
+      break;
+    case SUBR_z_p_zz:
+      mnemonic = "subr";
+      break;
+    case SUB_z_p_zz:
+      mnemonic = "sub";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareScalarCountAndLimit(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form =
+      (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm";
+
+  switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
+    case WHILELE_p_p_rr:
+      mnemonic = "whilele";
+      break;
+    case WHILELO_p_p_rr:
+      mnemonic = "whilelo";
+      break;
+    case WHILELS_p_p_rr:
+      mnemonic = "whilels";
+      break;
+    case WHILELT_p_p_rr:
+      mnemonic = "whilelt";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIntConvertToFP)";
+
+  switch (instr->Mask(SVEIntConvertToFPMask)) {
+    case SCVTF_z_p_z_h2fp16:
+      mnemonic = "scvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.h";
+      break;
+    case SCVTF_z_p_z_w2d:
+      mnemonic = "scvtf";
+      form = "'Zd.d, 'Pgl/m, 'Zn.s";
+      break;
+    case SCVTF_z_p_z_w2fp16:
+      mnemonic = "scvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.s";
+      break;
+    case SCVTF_z_p_z_w2s:
+      mnemonic = "scvtf";
+      form = "'Zd.s, 'Pgl/m, 'Zn.s";
+      break;
+    case SCVTF_z_p_z_x2d:
+      mnemonic = "scvtf";
+      form = "'Zd.d, 'Pgl/m, 'Zn.d";
+      break;
+    case SCVTF_z_p_z_x2fp16:
+      mnemonic = "scvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.d";
+      break;
+    case SCVTF_z_p_z_x2s:
+      mnemonic = "scvtf";
+      form = "'Zd.s, 'Pgl/m, 'Zn.d";
+      break;
+    case UCVTF_z_p_z_h2fp16:
+      mnemonic = "ucvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.h";
+      break;
+    case UCVTF_z_p_z_w2d:
+      mnemonic = "ucvtf";
+      form = "'Zd.d, 'Pgl/m, 'Zn.s";
+      break;
+    case UCVTF_z_p_z_w2fp16:
+      mnemonic = "ucvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.s";
+      break;
+    case UCVTF_z_p_z_w2s:
+      mnemonic = "ucvtf";
+      form = "'Zd.s, 'Pgl/m, 'Zn.s";
+      break;
+    case UCVTF_z_p_z_x2d:
+      mnemonic = "ucvtf";
+      form = "'Zd.d, 'Pgl/m, 'Zn.d";
+      break;
+    case UCVTF_z_p_z_x2fp16:
+      mnemonic = "ucvtf";
+      form = "'Zd.h, 'Pgl/m, 'Zn.d";
+      break;
+    case UCVTF_z_p_z_x2s:
+      mnemonic = "ucvtf";
+      form = "'Zd.s, 'Pgl/m, 'Zn.d";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntDivideVectors_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+    case SDIVR_z_p_zz:
+      mnemonic = "sdivr";
+      break;
+    case SDIV_z_p_zz:
+      mnemonic = "sdiv";
+      break;
+    case UDIVR_z_p_zz:
+      mnemonic = "udivr";
+      break;
+    case UDIV_z_p_zz:
+      mnemonic = "udiv";
+      break;
+    default:
+      break;
+  }
+
+  switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+    case SDIVR_z_p_zz:
+    case SDIV_z_p_zz:
+    case UDIVR_z_p_zz:
+    case UDIV_z_p_zz:
+      switch (instr->GetSVESize()) {
+        case kBRegSizeInBytesLog2:
+        case kHRegSizeInBytesLog2:
+          mnemonic = "unimplemented";
+          form = "(SVEIntBinaryArithmeticPredicated)";
+          break;
+        case kSRegSizeInBytesLog2:
+        case kDRegSizeInBytesLog2:
+          // The default form works for these instructions.
+          break;
+        default:
+          // GetSVESize() should never return other values.
+          VIXL_UNREACHABLE();
+          break;
+      }
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMinMaxDifference_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+    case SABD_z_p_zz:
+      mnemonic = "sabd";
+      break;
+    case SMAX_z_p_zz:
+      mnemonic = "smax";
+      break;
+    case SMIN_z_p_zz:
+      mnemonic = "smin";
+      break;
+    case UABD_z_p_zz:
+      mnemonic = "uabd";
+      break;
+    case UMAX_z_p_zz:
+      mnemonic = "umax";
+      break;
+    case UMIN_z_p_zz:
+      mnemonic = "umin";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zd.'t, #'u1205";
+
+  switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+    case SMAX_z_zi:
+      mnemonic = "smax";
+      form = "'Zd.'t, 'Zd.'t, #'s1205";
+      break;
+    case SMIN_z_zi:
+      mnemonic = "smin";
+      form = "'Zd.'t, 'Zd.'t, #'s1205";
+      break;
+    case UMAX_z_zi:
+      mnemonic = "umax";
+      break;
+    case UMIN_z_zi:
+      mnemonic = "umin";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIntMulImm_Unpredicated)";
+
+  switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
+    case MUL_z_zi:
+      mnemonic = "mul";
+      form = "'Zd.'t, 'Zd.'t, #'s1205";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
+    case MUL_z_p_zz:
+      mnemonic = "mul";
+      break;
+    case SMULH_z_p_zz:
+      mnemonic = "smulh";
+      break;
+    case UMULH_z_p_zz:
+      mnemonic = "umulh";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadAndBroadcastElement)";
+  const char *suffix_b = ", #'u2116]";
+  const char *suffix_h = ", #'u2116*2]";
+  const char *suffix_w = ", #'u2116*4]";
+  const char *suffix_d = ", #'u2116*8]";
+  const char *suffix = NULL;
+
+  switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
+    case LD1RB_z_p_bi_u16:
+      mnemonic = "ld1rb";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RB_z_p_bi_u32:
+      mnemonic = "ld1rb";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RB_z_p_bi_u64:
+      mnemonic = "ld1rb";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RB_z_p_bi_u8:
+      mnemonic = "ld1rb";
+      form = "{'Zt.b}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RD_z_p_bi_u64:
+      mnemonic = "ld1rd";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_d;
+      break;
+    case LD1RH_z_p_bi_u16:
+      mnemonic = "ld1rh";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns";
+      suffix = suffix_h;
+      break;
+    case LD1RH_z_p_bi_u32:
+      mnemonic = "ld1rh";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      suffix = suffix_h;
+      break;
+    case LD1RH_z_p_bi_u64:
+      mnemonic = "ld1rh";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_h;
+      break;
+    case LD1RSB_z_p_bi_s16:
+      mnemonic = "ld1rsb";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RSB_z_p_bi_s32:
+      mnemonic = "ld1rsb";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RSB_z_p_bi_s64:
+      mnemonic = "ld1rsb";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_b;
+      break;
+    case LD1RSH_z_p_bi_s32:
+      mnemonic = "ld1rsh";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      suffix = suffix_h;
+      break;
+    case LD1RSH_z_p_bi_s64:
+      mnemonic = "ld1rsh";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_h;
+      break;
+    case LD1RSW_z_p_bi_s64:
+      mnemonic = "ld1rsw";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_w;
+      break;
+    case LD1RW_z_p_bi_u32:
+      mnemonic = "ld1rw";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      suffix = suffix_w;
+      break;
+    case LD1RW_z_p_bi_u64:
+      mnemonic = "ld1rw";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      suffix = suffix_w;
+      break;
+    default:
+      break;
+  }
+
+  // Hide curly brackets if immediate is zero.
+  if (instr->ExtractBits(21, 16) == 0) {
+    suffix = "]";
+  }
+
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)";
+
+  const char *suffix =
+      (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]";
+
+  switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
+    case LD1RQB_z_p_bi_u8:
+      mnemonic = "ld1rqb";
+      form = "{'Zt.b}, 'Pgl/z, ['Xns";
+      break;
+    case LD1RQD_z_p_bi_u64:
+      mnemonic = "ld1rqd";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns";
+      break;
+    case LD1RQH_z_p_bi_u16:
+      mnemonic = "ld1rqh";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns";
+      break;
+    case LD1RQW_z_p_bi_u32:
+      mnemonic = "ld1rqw";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns";
+      break;
+    default:
+      suffix = NULL;
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)";
+
+  switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
+    case LD1RQB_z_p_br_contiguous:
+      mnemonic = "ld1rqb";
+      form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
+      break;
+    case LD1RQD_z_p_br_contiguous:
+      mnemonic = "ld1rqd";
+      form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
+      break;
+    case LD1RQH_z_p_br_contiguous:
+      mnemonic = "ld1rqh";
+      form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
+      break;
+    case LD1RQW_z_p_br_contiguous:
+      mnemonic = "ld1rqw";
+      form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)";
+
+  const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
+  const char *form_3 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
+  const char *form_4 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+      "'Pgl/z, ['Xns'ISveSvl]";
+
+  switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
+    case LD2B_z_p_bi_contiguous:
+      mnemonic = "ld2b";
+      form = form_2;
+      break;
+    case LD2D_z_p_bi_contiguous:
+      mnemonic = "ld2d";
+      form = form_2;
+      break;
+    case LD2H_z_p_bi_contiguous:
+      mnemonic = "ld2h";
+      form = form_2;
+      break;
+    case LD2W_z_p_bi_contiguous:
+      mnemonic = "ld2w";
+      form = form_2;
+      break;
+    case LD3B_z_p_bi_contiguous:
+      mnemonic = "ld3b";
+      form = form_3;
+      break;
+    case LD3D_z_p_bi_contiguous:
+      mnemonic = "ld3d";
+      form = form_3;
+      break;
+    case LD3H_z_p_bi_contiguous:
+      mnemonic = "ld3h";
+      form = form_3;
+      break;
+    case LD3W_z_p_bi_contiguous:
+      mnemonic = "ld3w";
+      form = form_3;
+      break;
+    case LD4B_z_p_bi_contiguous:
+      mnemonic = "ld4b";
+      form = form_4;
+      break;
+    case LD4D_z_p_bi_contiguous:
+      mnemonic = "ld4d";
+      form = form_4;
+      break;
+    case LD4H_z_p_bi_contiguous:
+      mnemonic = "ld4h";
+      form = form_4;
+      break;
+    case LD4W_z_p_bi_contiguous:
+      mnemonic = "ld4w";
+      form = form_4;
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)";
+
+  const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
+  const char *form_3 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
+  const char *form_4 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+      "'Pgl/z, ['Xns, 'Xm'NSveS]";
+
+  switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
+    case LD2B_z_p_br_contiguous:
+      mnemonic = "ld2b";
+      form = form_2;
+      break;
+    case LD2D_z_p_br_contiguous:
+      mnemonic = "ld2d";
+      form = form_2;
+      break;
+    case LD2H_z_p_br_contiguous:
+      mnemonic = "ld2h";
+      form = form_2;
+      break;
+    case LD2W_z_p_br_contiguous:
+      mnemonic = "ld2w";
+      form = form_2;
+      break;
+    case LD3B_z_p_br_contiguous:
+      mnemonic = "ld3b";
+      form = form_3;
+      break;
+    case LD3D_z_p_br_contiguous:
+      mnemonic = "ld3d";
+      form = form_3;
+      break;
+    case LD3H_z_p_br_contiguous:
+      mnemonic = "ld3h";
+      form = form_3;
+      break;
+    case LD3W_z_p_br_contiguous:
+      mnemonic = "ld3w";
+      form = form_3;
+      break;
+    case LD4B_z_p_br_contiguous:
+      mnemonic = "ld4b";
+      form = form_4;
+      break;
+    case LD4D_z_p_br_contiguous:
+      mnemonic = "ld4d";
+      form = form_4;
+      break;
+    case LD4H_z_p_br_contiguous:
+      mnemonic = "ld4h";
+      form = form_4;
+      break;
+    case LD4W_z_p_br_contiguous:
+      mnemonic = "ld4w";
+      form = form_4;
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadPredicateRegister)";
+
+  switch (instr->Mask(SVELoadPredicateRegisterMask)) {
+    case LDR_p_bi:
+      mnemonic = "ldr";
+      if (instr->Mask(0x003f1c00) == 0) {
+        form = "'Pd, ['Xns]";
+      } else {
+        form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVELoadVectorRegister)";
+
+  switch (instr->Mask(SVELoadVectorRegisterMask)) {
+    case LDR_z_bi:
+      mnemonic = "ldr";
+      if (instr->Mask(0x003f1c00) == 0) {
+        form = "'Zd, ['Xns]";
+      } else {
+        form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b";
+
+  switch (instr->Mask(SVEPartitionBreakConditionMask)) {
+    case BRKAS_p_p_p_z:
+      mnemonic = "brkas";
+      break;
+    case BRKA_p_p_p:
+      mnemonic = "brka";
+      break;
+    case BRKBS_p_p_p_z:
+      mnemonic = "brkbs";
+      break;
+    case BRKB_p_p_p:
+      mnemonic = "brkb";
+      break;
+    default:
+      form = "(SVEPartitionBreakCondition)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t";
+
+  switch (instr->Mask(SVEPermutePredicateElementsMask)) {
+    case TRN1_p_pp:
+      mnemonic = "trn1";
+      break;
+    case TRN2_p_pp:
+      mnemonic = "trn2";
+      break;
+    case UZP1_p_pp:
+      mnemonic = "uzp1";
+      break;
+    case UZP2_p_pp:
+      mnemonic = "uzp2";
+      break;
+    case ZIP1_p_pp:
+      mnemonic = "zip1";
+      break;
+    case ZIP2_p_pp:
+      mnemonic = "zip2";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateFirstActive)";
+
+  switch (instr->Mask(SVEPredicateFirstActiveMask)) {
+    case PFIRST_p_p_p:
+      mnemonic = "pfirst";
+      form = "'Pd.b, 'Pn, 'Pd.b";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateReadFromFFR_Unpredicated)";
+
+  switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
+    case RDFFR_p_f:
+      mnemonic = "rdffr";
+      form = "'Pd.b";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateTest(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateTest)";
+
+  switch (instr->Mask(SVEPredicateTestMask)) {
+    case PTEST_p_p:
+      mnemonic = "ptest";
+      form = "p'u1310, 'Pn.b";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateZero(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateZero)";
+
+  switch (instr->Mask(SVEPredicateZeroMask)) {
+    case PFALSE_p:
+      mnemonic = "pfalse";
+      form = "'Pd.b";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPropagateBreakToNextPartition(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b";
+
+  switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
+    case BRKNS_p_p_pp:
+      mnemonic = "brkns";
+      break;
+    case BRKN_p_p_pp:
+      mnemonic = "brkn";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEReversePredicateElements)";
+
+  switch (instr->Mask(SVEReversePredicateElementsMask)) {
+    case REV_p_p:
+      mnemonic = "rev";
+      form = "'Pd.'t, 'Pn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEReverseVectorElements)";
+
+  switch (instr->Mask(SVEReverseVectorElementsMask)) {
+    case REV_z_z:
+      mnemonic = "rev";
+      form = "'Zd.'t, 'Zn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+  unsigned size = instr->GetSVESize();
+  switch (instr->Mask(SVEReverseWithinElementsMask)) {
+    case RBIT_z_p_z:
+      mnemonic = "rbit";
+      break;
+    case REVB_z_z:
+      if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+          (size == kDRegSizeInBytesLog2)) {
+        mnemonic = "revb";
+      } else {
+        form = "(SVEReverseWithinElements)";
+      }
+      break;
+    case REVH_z_z:
+      if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) {
+        mnemonic = "revh";
+      } else {
+        form = "(SVEReverseWithinElements)";
+      }
+      break;
+    case REVW_z_z:
+      if (size == kDRegSizeInBytesLog2) {
+        mnemonic = "revw";
+      } else {
+        form = "(SVEReverseWithinElements)";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = IncDecFormHelper(instr,
+                                      "'R20d, 'Ipc, mul #'u1916+1",
+                                      "'R20d, 'Ipc",
+                                      "'R20d");
+  const char *form_sx = IncDecFormHelper(instr,
+                                         "'Xd, 'Wd, 'Ipc, mul #'u1916+1",
+                                         "'Xd, 'Wd, 'Ipc",
+                                         "'Xd, 'Wd");
+
+  switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
+    case SQDECB_r_rs_sx:
+      mnemonic = "sqdecb";
+      form = form_sx;
+      break;
+    case SQDECD_r_rs_sx:
+      mnemonic = "sqdecd";
+      form = form_sx;
+      break;
+    case SQDECH_r_rs_sx:
+      mnemonic = "sqdech";
+      form = form_sx;
+      break;
+    case SQDECW_r_rs_sx:
+      mnemonic = "sqdecw";
+      form = form_sx;
+      break;
+    case SQINCB_r_rs_sx:
+      mnemonic = "sqincb";
+      form = form_sx;
+      break;
+    case SQINCD_r_rs_sx:
+      mnemonic = "sqincd";
+      form = form_sx;
+      break;
+    case SQINCH_r_rs_sx:
+      mnemonic = "sqinch";
+      form = form_sx;
+      break;
+    case SQINCW_r_rs_sx:
+      mnemonic = "sqincw";
+      form = form_sx;
+      break;
+    case SQDECB_r_rs_x:
+      mnemonic = "sqdecb";
+      break;
+    case SQDECD_r_rs_x:
+      mnemonic = "sqdecd";
+      break;
+    case SQDECH_r_rs_x:
+      mnemonic = "sqdech";
+      break;
+    case SQDECW_r_rs_x:
+      mnemonic = "sqdecw";
+      break;
+    case SQINCB_r_rs_x:
+      mnemonic = "sqincb";
+      break;
+    case SQINCD_r_rs_x:
+      mnemonic = "sqincd";
+      break;
+    case SQINCH_r_rs_x:
+      mnemonic = "sqinch";
+      break;
+    case SQINCW_r_rs_x:
+      mnemonic = "sqincw";
+      break;
+    case UQDECB_r_rs_uw:
+    case UQDECB_r_rs_x:
+      mnemonic = "uqdecb";
+      break;
+    case UQDECD_r_rs_uw:
+    case UQDECD_r_rs_x:
+      mnemonic = "uqdecd";
+      break;
+    case UQDECH_r_rs_uw:
+    case UQDECH_r_rs_x:
+      mnemonic = "uqdech";
+      break;
+    case UQDECW_r_rs_uw:
+    case UQDECW_r_rs_x:
+      mnemonic = "uqdecw";
+      break;
+    case UQINCB_r_rs_uw:
+    case UQINCB_r_rs_x:
+      mnemonic = "uqincb";
+      break;
+    case UQINCD_r_rs_uw:
+    case UQINCD_r_rs_x:
+      mnemonic = "uqincd";
+      break;
+    case UQINCH_r_rs_uw:
+    case UQINCH_r_rs_x:
+      mnemonic = "uqinch";
+      break;
+    case UQINCW_r_rs_uw:
+    case UQINCW_r_rs_x:
+      mnemonic = "uqincw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVESaturatingIncDecVectorByElementCount(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = IncDecFormHelper(instr,
+                                      "'Zd.'t, 'Ipc, mul #'u1916+1",
+                                      "'Zd.'t, 'Ipc",
+                                      "'Zd.'t");
+
+  switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+    case SQDECD_z_zs:
+      mnemonic = "sqdecd";
+      break;
+    case SQDECH_z_zs:
+      mnemonic = "sqdech";
+      break;
+    case SQDECW_z_zs:
+      mnemonic = "sqdecw";
+      break;
+    case SQINCD_z_zs:
+      mnemonic = "sqincd";
+      break;
+    case SQINCH_z_zs:
+      mnemonic = "sqinch";
+      break;
+    case SQINCW_z_zs:
+      mnemonic = "sqincw";
+      break;
+    case UQDECD_z_zs:
+      mnemonic = "uqdecd";
+      break;
+    case UQDECH_z_zs:
+      mnemonic = "uqdech";
+      break;
+    case UQDECW_z_zs:
+      mnemonic = "uqdecw";
+      break;
+    case UQINCD_z_zs:
+      mnemonic = "uqincd";
+      break;
+    case UQINCH_z_zs:
+      mnemonic = "uqinch";
+      break;
+    case UQINCW_z_zs:
+      mnemonic = "uqincw";
+      break;
+    default:
+      form = "(SVEElementCount)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)";
+
+  const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
+  const char *form_3 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
+  const char *form_4 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+      "'Pgl, ['Xns'ISveSvl]";
+
+  switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
+    case ST2B_z_p_bi_contiguous:
+      mnemonic = "st2b";
+      form = form_2;
+      break;
+    case ST2H_z_p_bi_contiguous:
+      mnemonic = "st2h";
+      form = form_2;
+      break;
+    case ST2W_z_p_bi_contiguous:
+      mnemonic = "st2w";
+      form = form_2;
+      break;
+    case ST2D_z_p_bi_contiguous:
+      mnemonic = "st2d";
+      form = form_2;
+      break;
+    case ST3B_z_p_bi_contiguous:
+      mnemonic = "st3b";
+      form = form_3;
+      break;
+    case ST3H_z_p_bi_contiguous:
+      mnemonic = "st3h";
+      form = form_3;
+      break;
+    case ST3W_z_p_bi_contiguous:
+      mnemonic = "st3w";
+      form = form_3;
+      break;
+    case ST3D_z_p_bi_contiguous:
+      mnemonic = "st3d";
+      form = form_3;
+      break;
+    case ST4B_z_p_bi_contiguous:
+      mnemonic = "st4b";
+      form = form_4;
+      break;
+    case ST4H_z_p_bi_contiguous:
+      mnemonic = "st4h";
+      form = form_4;
+      break;
+    case ST4W_z_p_bi_contiguous:
+      mnemonic = "st4w";
+      form = form_4;
+      break;
+    case ST4D_z_p_bi_contiguous:
+      mnemonic = "st4d";
+      form = form_4;
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)";
+
+  const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
+  const char *form_3 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
+  const char *form_4 =
+      "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
+      "'Pgl, ['Xns, 'Xm'NSveS]";
+
+  switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
+    case ST2B_z_p_br_contiguous:
+      mnemonic = "st2b";
+      form = form_2;
+      break;
+    case ST2D_z_p_br_contiguous:
+      mnemonic = "st2d";
+      form = form_2;
+      break;
+    case ST2H_z_p_br_contiguous:
+      mnemonic = "st2h";
+      form = form_2;
+      break;
+    case ST2W_z_p_br_contiguous:
+      mnemonic = "st2w";
+      form = form_2;
+      break;
+    case ST3B_z_p_br_contiguous:
+      mnemonic = "st3b";
+      form = form_3;
+      break;
+    case ST3D_z_p_br_contiguous:
+      mnemonic = "st3d";
+      form = form_3;
+      break;
+    case ST3H_z_p_br_contiguous:
+      mnemonic = "st3h";
+      form = form_3;
+      break;
+    case ST3W_z_p_br_contiguous:
+      mnemonic = "st3w";
+      form = form_3;
+      break;
+    case ST4B_z_p_br_contiguous:
+      mnemonic = "st4b";
+      form = form_4;
+      break;
+    case ST4D_z_p_br_contiguous:
+      mnemonic = "st4d";
+      form = form_4;
+      break;
+    case ST4H_z_p_br_contiguous:
+      mnemonic = "st4h";
+      form = form_4;
+      break;
+    case ST4W_z_p_br_contiguous:
+      mnemonic = "st4w";
+      form = form_4;
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEStorePredicateRegister)";
+
+  switch (instr->Mask(SVEStorePredicateRegisterMask)) {
+    case STR_p_bi:
+      mnemonic = "str";
+      if (instr->Mask(0x003f1c00) == 0) {
+        form = "'Pd, ['Xns]";
+      } else {
+        form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEStoreVectorRegister)";
+
+  switch (instr->Mask(SVEStoreVectorRegisterMask)) {
+    case STR_z_bi:
+      mnemonic = "str";
+      if (instr->Mask(0x003f1c00) == 0) {
+        form = "'Zd, ['Xns]";
+      } else {
+        form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVETableLookup(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVETableLookup)";
+
+  switch (instr->Mask(SVETableLookupMask)) {
+    case TBL_z_zz_1:
+      mnemonic = "tbl";
+      form = "'Zd.'t, {'Zn.'t}, 'Zm.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.h, 'Pn.b";
+
+  switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
+    case PUNPKHI_p_p:
+      mnemonic = "punpkhi";
+      break;
+    case PUNPKLO_p_p:
+      mnemonic = "punpklo";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zn.'th";
+
+  if (instr->GetSVESize() == 0) {
+    // The lowest lane size of the destination vector is H-sized lane.
+    Format(instr, "unallocated", "(SVEUnpackVectorElements)");
+    return;
+  }
+
+  switch (instr->Mask(SVEUnpackVectorElementsMask)) {
+    case SUNPKHI_z_z:
+      mnemonic = "sunpkhi";
+      break;
+    case SUNPKLO_z_z:
+      mnemonic = "sunpklo";
+      break;
+    case UUNPKHI_z_z:
+      mnemonic = "uunpkhi";
+      break;
+    case UUNPKLO_z_z:
+      mnemonic = "uunpklo";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEVectorSplice_Destructive)";
+
+  switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
+    case SPLICE_z_p_zz_des:
+      mnemonic = "splice";
+      form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) {
+  const char *mnemonic = "adr";
+  const char *form = "'Zd.d, ['Zn.d, 'Zm.d";
+  const char *suffix = NULL;
+
+  bool msz_is_zero = (instr->ExtractBits(11, 10) == 0);
+
+  switch (instr->Mask(SVEAddressGenerationMask)) {
+    case ADR_z_az_d_s32_scaled:
+      suffix = msz_is_zero ? ", sxtw]" : ", sxtw #'u1110]";
+      break;
+    case ADR_z_az_d_u32_scaled:
+      suffix = msz_is_zero ? ", uxtw]" : ", uxtw #'u1110]";
+      break;
+    case ADR_z_az_s_same_scaled:
+    case ADR_z_az_d_same_scaled:
+      form = "'Zd.'t, ['Zn.'t, 'Zm.'t";
+      suffix = msz_is_zero ? "]" : ", lsl #'u1110]";
+      break;
+    default:
+      mnemonic = "unimplemented";
+      form = "(SVEAddressGeneration)";
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEBitwiseLogicalUnpredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.d, 'Zn.d, 'Zm.d";
+
+  switch (instr->Mask(SVEBitwiseLogicalUnpredicatedMask)) {
+    case AND_z_zz:
+      mnemonic = "and";
+      break;
+    case BIC_z_zz:
+      mnemonic = "bic";
+      break;
+    case EOR_z_zz:
+      mnemonic = "eor";
+      break;
+    case ORR_z_zz:
+      mnemonic = "orr";
+      if (instr->GetRn() == instr->GetRm()) {
+        mnemonic = "mov";
+        form = "'Zd.d, 'Zn.d";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEBitwiseShiftUnpredicated)";
+  unsigned tsize =
+      (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19);
+  unsigned lane_size = instr->GetSVESize();
+
+  switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+    case ASR_z_zi:
+      if (tsize != 0) {
+        // The tsz field must not be zero.
+        mnemonic = "asr";
+        form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+      }
+      break;
+    case ASR_z_zw:
+      if (lane_size <= kSRegSizeInBytesLog2) {
+        mnemonic = "asr";
+        form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+      }
+      break;
+    case LSL_z_zi:
+      if (tsize != 0) {
+        // The tsz field must not be zero.
+        mnemonic = "lsl";
+        form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver";
+      }
+      break;
+    case LSL_z_zw:
+      if (lane_size <= kSRegSizeInBytesLog2) {
+        mnemonic = "lsl";
+        form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+      }
+      break;
+    case LSR_z_zi:
+      if (tsize != 0) {
+        // The tsz field must not be zero.
+        mnemonic = "lsr";
+        form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+      }
+      break;
+    case LSR_z_zw:
+      if (lane_size <= kSRegSizeInBytesLog2) {
+        mnemonic = "lsr";
+        form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+      }
+      break;
+    default:
+      break;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEElementCount(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form =
+      IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
+
+  switch (instr->Mask(SVEElementCountMask)) {
+    case CNTB_r_s:
+      mnemonic = "cntb";
+      break;
+    case CNTD_r_s:
+      mnemonic = "cntd";
+      break;
+    case CNTH_r_s:
+      mnemonic = "cnth";
+      break;
+    case CNTW_r_s:
+      mnemonic = "cntw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPAccumulatingReduction)";
+
+  switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
+    case FADDA_v_p_z:
+      mnemonic = "fadda";
+      form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+  switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
+    case FADD_z_zz:
+      mnemonic = "fadd";
+      break;
+    case FMUL_z_zz:
+      mnemonic = "fmul";
+      break;
+    case FRECPS_z_zz:
+      mnemonic = "frecps";
+      break;
+    case FRSQRTS_z_zz:
+      mnemonic = "frsqrts";
+      break;
+    case FSUB_z_zz:
+      mnemonic = "fsub";
+      break;
+    case FTSMUL_z_zz:
+      mnemonic = "ftsmul";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+
+  switch (instr->Mask(SVEFPCompareVectorsMask)) {
+    case FACGE_p_p_zz:
+      mnemonic = "facge";
+      break;
+    case FACGT_p_p_zz:
+      mnemonic = "facgt";
+      break;
+    case FCMEQ_p_p_zz:
+      mnemonic = "fcmeq";
+      break;
+    case FCMGE_p_p_zz:
+      mnemonic = "fcmge";
+      break;
+    case FCMGT_p_p_zz:
+      mnemonic = "fcmgt";
+      break;
+    case FCMNE_p_p_zz:
+      mnemonic = "fcmne";
+      break;
+    case FCMUO_p_p_zz:
+      mnemonic = "fcmuo";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0";
+
+  switch (instr->Mask(SVEFPCompareWithZeroMask)) {
+    case FCMEQ_p_p_z0:
+      mnemonic = "fcmeq";
+      break;
+    case FCMGE_p_p_z0:
+      mnemonic = "fcmge";
+      break;
+    case FCMGT_p_p_z0:
+      mnemonic = "fcmgt";
+      break;
+    case FCMLE_p_p_z0:
+      mnemonic = "fcmle";
+      break;
+    case FCMLT_p_p_z0:
+      mnemonic = "fcmlt";
+      break;
+    case FCMNE_p_p_z0:
+      mnemonic = "fcmne";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPComplexAddition)";
+
+  switch (instr->Mask(SVEFPComplexAdditionMask)) {
+    case FCADD_z_p_zz:
+      mnemonic = "fcadd";
+      if (instr->ExtractBit(16) == 0) {
+        form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90";
+      } else {
+        form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPComplexMulAdd)";
+  const char *suffix = NULL;
+
+  const char *fcmla_constants[] = {"0", "90", "180", "270"};
+
+  switch (instr->Mask(SVEFPComplexMulAddMask)) {
+    case FCMLA_z_p_zzz:
+      mnemonic = "fcmla";
+      form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #";
+      suffix = fcmla_constants[instr->ExtractBits(14, 13)];
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPComplexMulAddIndex)";
+
+  const char *fcmla_constants[] = {"0", "90", "180", "270"};
+  const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)];
+
+  switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
+    case FCMLA_z_zzzi_h:
+      mnemonic = "fcmla";
+      form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #";
+      break;
+    case FCMLA_z_zzzi_s:
+      mnemonic = "fcmla";
+      form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #";
+      break;
+    default:
+      suffix = NULL;
+      break;
+  }
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
+
+  switch (instr->Mask(SVEFPFastReductionMask)) {
+    case FADDV_v_p_z:
+      mnemonic = "faddv";
+      break;
+    case FMAXNMV_v_p_z:
+      mnemonic = "fmaxnmv";
+      break;
+    case FMAXV_v_p_z:
+      mnemonic = "fmaxv";
+      break;
+    case FMINNMV_v_p_z:
+      mnemonic = "fminnmv";
+      break;
+    case FMINV_v_p_z:
+      mnemonic = "fminv";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPMulIndex)";
+
+  switch (instr->Mask(SVEFPMulIndexMask)) {
+    case FMUL_z_zzi_d:
+      mnemonic = "fmul";
+      form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+      break;
+    case FMUL_z_zzi_h:
+    case FMUL_z_zzi_h_i3h:
+      mnemonic = "fmul";
+      form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+      break;
+    case FMUL_z_zzi_s:
+      mnemonic = "fmul";
+      form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+
+  switch (instr->Mask(SVEFPMulAddMask)) {
+    case FMAD_z_p_zzz:
+      mnemonic = "fmad";
+      break;
+    case FMLA_z_p_zzz:
+      mnemonic = "fmla";
+      break;
+    case FMLS_z_p_zzz:
+      mnemonic = "fmls";
+      break;
+    case FMSB_z_p_zzz:
+      mnemonic = "fmsb";
+      break;
+    case FNMAD_z_p_zzz:
+      mnemonic = "fnmad";
+      break;
+    case FNMLA_z_p_zzz:
+      mnemonic = "fnmla";
+      break;
+    case FNMLS_z_p_zzz:
+      mnemonic = "fnmls";
+      break;
+    case FNMSB_z_p_zzz:
+      mnemonic = "fnmsb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEFPMulAddIndex)";
+
+  switch (instr->Mask(SVEFPMulAddIndexMask)) {
+    case FMLA_z_zzzi_d:
+      mnemonic = "fmla";
+      form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+      break;
+    case FMLA_z_zzzi_s:
+      mnemonic = "fmla";
+      form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+      break;
+    case FMLS_z_zzzi_d:
+      mnemonic = "fmls";
+      form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+      break;
+    case FMLS_z_zzzi_s:
+      mnemonic = "fmls";
+      form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+      break;
+    case FMLA_z_zzzi_h:
+    case FMLA_z_zzzi_h_i3h:
+      mnemonic = "fmla";
+      form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+      break;
+    case FMLS_z_zzzi_h:
+    case FMLS_z_zzzi_h_i3h:
+      mnemonic = "fmls";
+      form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+      break;
+    default:
+      break;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zn.'t";
+
+  switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
+    case FRECPE_z_z:
+      mnemonic = "frecpe";
+      break;
+    case FRSQRTE_z_z:
+      mnemonic = "frsqrte";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIncDecByPredicateCount)";
+
+  switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+    case DECP_r_p_r:
+    case DECP_z_p_z:
+      mnemonic = "decp";
+      break;
+    case INCP_r_p_r:
+    case INCP_z_p_z:
+      mnemonic = "incp";
+      break;
+    case SQDECP_r_p_r_sx:
+    case SQDECP_r_p_r_x:
+    case SQDECP_z_p_z:
+      mnemonic = "sqdecp";
+      break;
+    case SQINCP_r_p_r_sx:
+    case SQINCP_r_p_r_x:
+    case SQINCP_z_p_z:
+      mnemonic = "sqincp";
+      break;
+    case UQDECP_r_p_r_uw:
+    case UQDECP_r_p_r_x:
+    case UQDECP_z_p_z:
+      mnemonic = "uqdecp";
+      break;
+    case UQINCP_r_p_r_uw:
+    case UQINCP_r_p_r_x:
+    case UQINCP_z_p_z:
+      mnemonic = "uqincp";
+      break;
+    default:
+      break;
+  }
+
+  switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+    // <Xdn>, <Pg>.<T>
+    case DECP_r_p_r:
+    case INCP_r_p_r:
+      form = "'Xd, 'Pn.'t";
+      break;
+    // <Zdn>.<T>, <Pg>
+    case DECP_z_p_z:
+    case INCP_z_p_z:
+    case SQDECP_z_p_z:
+    case SQINCP_z_p_z:
+    case UQDECP_z_p_z:
+    case UQINCP_z_p_z:
+      form = "'Zd.'t, 'Pn";
+      break;
+    // <Xdn>, <Pg>.<T>, <Wdn>
+    case SQDECP_r_p_r_sx:
+    case SQINCP_r_p_r_sx:
+      form = "'Xd, 'Pn.'t, 'Wd";
+      break;
+    // <Xdn>, <Pg>.<T>
+    case SQDECP_r_p_r_x:
+    case SQINCP_r_p_r_x:
+    case UQDECP_r_p_r_x:
+    case UQINCP_r_p_r_x:
+      form = "'Xd, 'Pn.'t";
+      break;
+    // <Wdn>, <Pg>.<T>
+    case UQDECP_r_p_r_uw:
+    case UQINCP_r_p_r_uw:
+      form = "'Wd, 'Pn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIndexGeneration)";
+
+  bool w_inputs =
+      static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2;
+
+  switch (instr->Mask(SVEIndexGenerationMask)) {
+    case INDEX_z_ii:
+      mnemonic = "index";
+      form = "'Zd.'t, #'s0905, #'s2016";
+      break;
+    case INDEX_z_ir:
+      mnemonic = "index";
+      form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm";
+      break;
+    case INDEX_z_ri:
+      mnemonic = "index";
+      form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016";
+      break;
+    case INDEX_z_rr:
+      mnemonic = "index";
+      form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+  switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
+    case ADD_z_zz:
+      mnemonic = "add";
+      break;
+    case SQADD_z_zz:
+      mnemonic = "sqadd";
+      break;
+    case SQSUB_z_zz:
+      mnemonic = "sqsub";
+      break;
+    case SUB_z_zz:
+      mnemonic = "sub";
+      break;
+    case UQADD_z_zz:
+      mnemonic = "uqadd";
+      break;
+    case UQSUB_z_zz:
+      mnemonic = "uqsub";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016";
+
+  switch (instr->Mask(SVEIntCompareSignedImmMask)) {
+    case CMPEQ_p_p_zi:
+      mnemonic = "cmpeq";
+      break;
+    case CMPGE_p_p_zi:
+      mnemonic = "cmpge";
+      break;
+    case CMPGT_p_p_zi:
+      mnemonic = "cmpgt";
+      break;
+    case CMPLE_p_p_zi:
+      mnemonic = "cmple";
+      break;
+    case CMPLT_p_p_zi:
+      mnemonic = "cmplt";
+      break;
+    case CMPNE_p_p_zi:
+      mnemonic = "cmpne";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014";
+
+  switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
+    case CMPHI_p_p_zi:
+      mnemonic = "cmphi";
+      break;
+    case CMPHS_p_p_zi:
+      mnemonic = "cmphs";
+      break;
+    case CMPLO_p_p_zi:
+      mnemonic = "cmplo";
+      break;
+    case CMPLS_p_p_zi:
+      mnemonic = "cmpls";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d";
+
+  switch (instr->Mask(SVEIntCompareVectorsMask)) {
+    case CMPEQ_p_p_zw:
+      mnemonic = "cmpeq";
+      break;
+    case CMPEQ_p_p_zz:
+      mnemonic = "cmpeq";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    case CMPGE_p_p_zw:
+      mnemonic = "cmpge";
+      break;
+    case CMPGE_p_p_zz:
+      mnemonic = "cmpge";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    case CMPGT_p_p_zw:
+      mnemonic = "cmpgt";
+      break;
+    case CMPGT_p_p_zz:
+      mnemonic = "cmpgt";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    case CMPHI_p_p_zw:
+      mnemonic = "cmphi";
+      break;
+    case CMPHI_p_p_zz:
+      mnemonic = "cmphi";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    case CMPHS_p_p_zw:
+      mnemonic = "cmphs";
+      break;
+    case CMPHS_p_p_zz:
+      mnemonic = "cmphs";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    case CMPLE_p_p_zw:
+      mnemonic = "cmple";
+      break;
+    case CMPLO_p_p_zw:
+      mnemonic = "cmplo";
+      break;
+    case CMPLS_p_p_zw:
+      mnemonic = "cmpls";
+      break;
+    case CMPLT_p_p_zw:
+      mnemonic = "cmplt";
+      break;
+    case CMPNE_p_p_zw:
+      mnemonic = "cmpne";
+      break;
+    case CMPNE_p_p_zz:
+      mnemonic = "cmpne";
+      form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIntMulAddPredicated)";
+
+  switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
+    case MAD_z_p_zzz:
+      mnemonic = "mad";
+      form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
+      break;
+    case MLA_z_p_zzz:
+      mnemonic = "mla";
+      form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+      break;
+    case MLS_z_p_zzz:
+      mnemonic = "mls";
+      form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
+      break;
+    case MSB_z_p_zzz:
+      mnemonic = "msb";
+      form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEIntMulAddUnpredicated)";
+
+  if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) {
+    form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq";
+    switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
+      case SDOT_z_zzz:
+        mnemonic = "sdot";
+        break;
+      case UDOT_z_zzz:
+        mnemonic = "udot";
+        break;
+      default:
+        break;
+    }
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEMovprfx(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEMovprfx)";
+
+  if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) {
+    mnemonic = "movprfx";
+    form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t";
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntReduction(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Vdv, 'Pgl, 'Zn.'t";
+
+  if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
+    switch (instr->Mask(SVEIntReductionLogicalMask)) {
+      case ANDV_r_p_z:
+        mnemonic = "andv";
+        break;
+      case EORV_r_p_z:
+        mnemonic = "eorv";
+        break;
+      case ORV_r_p_z:
+        mnemonic = "orv";
+        break;
+      default:
+        break;
+    }
+  } else {
+    switch (instr->Mask(SVEIntReductionMask)) {
+      case SADDV_r_p_z:
+        mnemonic = "saddv";
+        form = "'Dd, 'Pgl, 'Zn.'t";
+        break;
+      case SMAXV_r_p_z:
+        mnemonic = "smaxv";
+        break;
+      case SMINV_r_p_z:
+        mnemonic = "sminv";
+        break;
+      case UADDV_r_p_z:
+        mnemonic = "uaddv";
+        form = "'Dd, 'Pgl, 'Zn.'t";
+        break;
+      case UMAXV_r_p_z:
+        mnemonic = "umaxv";
+        break;
+      case UMINV_r_p_z:
+        mnemonic = "uminv";
+        break;
+      default:
+        break;
+    }
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEIntUnaryArithmeticPredicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+
+  switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+    case ABS_z_p_z:
+      mnemonic = "abs";
+      break;
+    case CLS_z_p_z:
+      mnemonic = "cls";
+      break;
+    case CLZ_z_p_z:
+      mnemonic = "clz";
+      break;
+    case CNOT_z_p_z:
+      mnemonic = "cnot";
+      break;
+    case CNT_z_p_z:
+      mnemonic = "cnt";
+      break;
+    case FABS_z_p_z:
+      mnemonic = "fabs";
+      break;
+    case FNEG_z_p_z:
+      mnemonic = "fneg";
+      break;
+    case NEG_z_p_z:
+      mnemonic = "neg";
+      break;
+    case NOT_z_p_z:
+      mnemonic = "not";
+      break;
+    case SXTB_z_p_z:
+      mnemonic = "sxtb";
+      break;
+    case SXTH_z_p_z:
+      mnemonic = "sxth";
+      break;
+    case SXTW_z_p_z:
+      mnemonic = "sxtw";
+      break;
+    case UXTB_z_p_z:
+      mnemonic = "uxtb";
+      break;
+    case UXTH_z_p_z:
+      mnemonic = "uxth";
+      break;
+    case UXTW_z_p_z:
+      mnemonic = "uxtw";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEMulIndex(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEMulIndex)";
+
+  switch (instr->Mask(SVEMulIndexMask)) {
+    case SDOT_z_zzzi_d:
+      mnemonic = "sdot";
+      form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
+      break;
+    case SDOT_z_zzzi_s:
+      mnemonic = "sdot";
+      form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
+      break;
+    case UDOT_z_zzzi_d:
+      mnemonic = "udot";
+      form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
+      break;
+    case UDOT_z_zzzi_s:
+      mnemonic = "udot";
+      form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPermuteVectorExtract)";
+
+  switch (instr->Mask(SVEPermuteVectorExtractMask)) {
+    case EXT_z_zi_des:
+      mnemonic = "ext";
+      form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+
+  switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
+    case TRN1_z_zz:
+      mnemonic = "trn1";
+      break;
+    case TRN2_z_zz:
+      mnemonic = "trn2";
+      break;
+    case UZP1_z_zz:
+      mnemonic = "uzp1";
+      break;
+    case UZP2_z_zz:
+      mnemonic = "uzp2";
+      break;
+    case ZIP1_z_zz:
+      mnemonic = "zip1";
+      break;
+    case ZIP2_z_zz:
+      mnemonic = "zip2";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateCount(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateCount)";
+
+  switch (instr->Mask(SVEPredicateCountMask)) {
+    case CNTP_r_p_p:
+      mnemonic = "cntp";
+      form = "'Xd, p'u1310, 'Pn.'t";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
+
+  int pd = instr->GetPd();
+  int pn = instr->GetPn();
+  int pm = instr->GetPm();
+  int pg = instr->ExtractBits(13, 10);
+
+  switch (instr->Mask(SVEPredicateLogicalMask)) {
+    case ANDS_p_p_pp_z:
+      mnemonic = "ands";
+      if (pn == pm) {
+        mnemonic = "movs";
+        form = "'Pd.b, p'u1310/z, 'Pn.b";
+      }
+      break;
+    case AND_p_p_pp_z:
+      mnemonic = "and";
+      if (pn == pm) {
+        mnemonic = "mov";
+        form = "'Pd.b, p'u1310/z, 'Pn.b";
+      }
+      break;
+    case BICS_p_p_pp_z:
+      mnemonic = "bics";
+      break;
+    case BIC_p_p_pp_z:
+      mnemonic = "bic";
+      break;
+    case EORS_p_p_pp_z:
+      mnemonic = "eors";
+      if (pm == pg) {
+        mnemonic = "nots";
+        form = "'Pd.b, 'Pm/z, 'Pn.b";
+      }
+      break;
+    case EOR_p_p_pp_z:
+      mnemonic = "eor";
+      if (pm == pg) {
+        mnemonic = "not";
+        form = "'Pd.b, 'Pm/z, 'Pn.b";
+      }
+      break;
+    case NANDS_p_p_pp_z:
+      mnemonic = "nands";
+      break;
+    case NAND_p_p_pp_z:
+      mnemonic = "nand";
+      break;
+    case NORS_p_p_pp_z:
+      mnemonic = "nors";
+      break;
+    case NOR_p_p_pp_z:
+      mnemonic = "nor";
+      break;
+    case ORNS_p_p_pp_z:
+      mnemonic = "orns";
+      break;
+    case ORN_p_p_pp_z:
+      mnemonic = "orn";
+      break;
+    case ORRS_p_p_pp_z:
+      mnemonic = "orrs";
+      if ((pn == pm) && (pn == pg)) {
+        mnemonic = "movs";
+        form = "'Pd.b, 'Pn.b";
+      }
+      break;
+    case ORR_p_p_pp_z:
+      mnemonic = "orr";
+      if ((pn == pm) && (pn == pg)) {
+        mnemonic = "mov";
+        form = "'Pd.b, 'Pn.b";
+      }
+      break;
+    case SEL_p_p_pp:
+      if (pd == pm) {
+        mnemonic = "mov";
+        form = "'Pd.b, p'u1310/m, 'Pn.b";
+      } else {
+        mnemonic = "sel";
+        form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b";
+      }
+      break;
+    default:
+      form = "(SVEPredicateLogical)";
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) {
+  // This group only contains PTRUE{S}, and there are no unallocated encodings.
+  VIXL_STATIC_ASSERT(
+      SVEPredicateInitializeMask ==
+      (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
+  VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
+              (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
+
+  const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue";
+  const char *form = "'Pd.'t, 'Ipc";
+  // Omit the pattern if it is the default ('ALL').
+  if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t";
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) {
+  // This group only contains PNEXT, and there are no unallocated encodings.
+  VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
+  VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
+
+  Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t");
+}
+
+void Disassembler::VisitSVEPredicateReadFromFFR_Predicated(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEPredicateReadFromFFR_Predicated)";
+  switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
+    case RDFFR_p_p_f:
+    case RDFFRS_p_p_f:
+      mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr";
+      form = "'Pd.b, 'Pn/z";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
+
+  switch (instr->Mask(SVEPropagateBreakMask)) {
+    case BRKPAS_p_p_pp:
+      mnemonic = "brkpas";
+      break;
+    case BRKPA_p_p_pp:
+      mnemonic = "brkpa";
+      break;
+    case BRKPBS_p_p_pp:
+      mnemonic = "brkpbs";
+      break;
+    case BRKPB_p_p_pp:
+      mnemonic = "brkpb";
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "'Xds, 'Xms, #'s1005";
+
+  switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
+    case ADDPL_r_ri:
+      mnemonic = "addpl";
+      break;
+    case ADDVL_r_ri:
+      mnemonic = "addvl";
+      break;
+    default:
+      form = "(SVEStackFrameAdjustment)";
+      break;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEStackFrameSize)";
+
+  switch (instr->Mask(SVEStackFrameSizeMask)) {
+    case RDVL_r_i:
+      mnemonic = "rdvl";
+      form = "'Xd, #'s1005";
+      break;
+    default:
+      break;
+  }
+
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEVectorSelect(const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "(SVEVectorSelect)";
+
+  switch (instr->Mask(SVEVectorSelectMask)) {
+    case SEL_z_p_zz:
+      if (instr->GetRd() == instr->GetRm()) {
+        mnemonic = "mov";
+        form = "'Zd.'t, p'u1310/m, 'Zn.'t";
+      } else {
+        mnemonic = "sel";
+        form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t";
+      }
+      break;
+    default:
+      break;
+  }
+  Format(instr, mnemonic, form);
+}
+
+void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
+  const char *suffix =
+      (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
+
+  switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
+    case LD1B_z_p_bi_u16:
+    case LD1B_z_p_bi_u32:
+    case LD1B_z_p_bi_u64:
+    case LD1B_z_p_bi_u8:
+      mnemonic = "ld1b";
+      break;
+    case LD1D_z_p_bi_u64:
+      mnemonic = "ld1d";
+      break;
+    case LD1H_z_p_bi_u16:
+    case LD1H_z_p_bi_u32:
+    case LD1H_z_p_bi_u64:
+      mnemonic = "ld1h";
+      break;
+    case LD1SB_z_p_bi_s16:
+    case LD1SB_z_p_bi_s32:
+    case LD1SB_z_p_bi_s64:
+      mnemonic = "ld1sb";
+      break;
+    case LD1SH_z_p_bi_s32:
+    case LD1SH_z_p_bi_s64:
+      mnemonic = "ld1sh";
+      break;
+    case LD1SW_z_p_bi_s64:
+      mnemonic = "ld1sw";
+      break;
+    case LD1W_z_p_bi_u32:
+    case LD1W_z_p_bi_u64:
+      mnemonic = "ld1w";
+      break;
+    default:
+      form = "(SVEContiguousLoad_ScalarPlusImm)";
+      suffix = NULL;
+      break;
+  }
+
+  Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar(
+    const Instruction *instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm";
+  const char *suffix = NULL;
+
+  switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+    case LD1B_z_p_br_u16:
+    case LD1B_z_p_br_u32:
+    case LD1B_z_p_br_u64:
+    case LD1B_z_p_br_u8:
+      mnemonic = "ld1b";
+      suffix = "]";
+      break;
+    case LD1D_z_p_br_u64:
+      mnemonic = "ld1d";
+      suffix = ", lsl #'u2423]";
+      break;
+    case LD1H_z_p_br_u16:
+    case LD1H_z_p_br_u32:
+    case LD1H_z_p_br_u64:
+      mnemonic = "ld1h";
+      suffix = ", lsl #'u2423]";
+      break;
+    case LD1SB_z_p_br_s16:
+    case LD1SB_z_p_br_s32:
+    case LD1SB_z_p_br_s64:
+      mnemonic = "ld1sb";
+      suffix = "]";
+      break;
+    case LD1SH_z_p_br_s32:
+    case LD1SH_z_p_br_s64:
+      mnemonic = "ld1sh";
+      suffix = ", lsl #1]";
+      break;
+    case LD1SW_z_p_br_s64:
+      mnemonic = "ld1sw";
+      suffix = ", lsl #2]";
+      break;
+    case LD1W_z_p_br_u32:
+    case LD1W_z_p_br_u64:
+      mnemonic = "ld1w";
+      suffix = ", lsl #'u2423]";
+      break;
+    default:
+      form = "(SVEContiguousLoad_ScalarPlusScalar)";
+      suffix = NULL;
+      break;
+  }
+
+  Format(instr, mnemonic, form, suffix);
+}
 
 void Disassembler::VisitReserved(const Instruction *instr) {
   // UDF is the only instruction in this group, and the Decoder is precise.
@@ -5059,14 +9633,18 @@ int64_t Disassembler::CodeRelativeAddress(const void *addr) {
 
 void Disassembler::Format(const Instruction *instr,
                           const char *mnemonic,
-                          const char *format) {
+                          const char *format0,
+                          const char *format1) {
   VIXL_ASSERT(mnemonic != NULL);
   ResetOutput();
   Substitute(instr, mnemonic);
-  if (format != NULL) {
+  if (format0 != NULL) {
     VIXL_ASSERT(buffer_pos_ < buffer_size_);
     buffer_[buffer_pos_++] = ' ';
-    Substitute(instr, format);
+    Substitute(instr, format0);
+    if (format1 != NULL) {
+      Substitute(instr, format1);
+    }
   }
   VIXL_ASSERT(buffer_pos_ < buffer_size_);
   buffer_[buffer_pos_] = 0;
@@ -5091,10 +9669,11 @@ void Disassembler::Substitute(const Instruction *instr, const char *string) {
 int Disassembler::SubstituteField(const Instruction *instr,
                                   const char *format) {
   switch (format[0]) {
-    // NB. The remaining substitution prefix characters are: GJKUZ.
-    case 'R':  // Register. X or W, selected by sf bit.
+    // NB. The remaining substitution prefix upper-case characters are: JU.
+    case 'R':  // Register. X or W, selected by sf (or alternative) bit.
     case 'F':  // FP register. S or D, selected by type field.
     case 'V':  // Vector register, V, vector format.
+    case 'Z':  // Scalable vector register.
     case 'W':
     case 'X':
     case 'B':
@@ -5103,14 +9682,14 @@ int Disassembler::SubstituteField(const Instruction *instr,
     case 'D':
     case 'Q':
       return SubstituteRegisterField(instr, format);
+    case 'P':
+      return SubstitutePredicateRegisterField(instr, format);
     case 'I':
       return SubstituteImmediateField(instr, format);
     case 'L':
       return SubstituteLiteralField(instr, format);
     case 'N':
       return SubstituteShiftField(instr, format);
-    case 'P':
-      return SubstitutePrefetchField(instr, format);
     case 'C':
       return SubstituteConditionField(instr, format);
     case 'E':
@@ -5127,6 +9706,15 @@ int Disassembler::SubstituteField(const Instruction *instr,
       return SubstituteCrField(instr, format);
     case 'G':
       return SubstituteSysOpField(instr, format);
+    case 'p':
+      return SubstitutePrefetchField(instr, format);
+    case 'u':
+    case 's':
+      return SubstituteIntField(instr, format);
+    case 't':
+      return SubstituteSVESize(instr, format);
+    case '?':
+      return SubstituteTernary(instr, format);
     default: {
       VIXL_UNREACHABLE();
       return 1;
@@ -5134,55 +9722,20 @@ int Disassembler::SubstituteField(const Instruction *instr,
   }
 }
 
+std::pair<unsigned, unsigned> Disassembler::GetRegNumForField(
+    const Instruction *instr, char reg_prefix, const char *field) {
+  unsigned reg_num = UINT_MAX;
+  unsigned field_len = 1;
 
-int Disassembler::SubstituteRegisterField(const Instruction *instr,
-                                          const char *format) {
-  char reg_prefix = format[0];
-  unsigned reg_num = 0;
-  unsigned field_len = 2;
-
-  switch (format[1]) {
+  switch (field[0]) {
     case 'd':
       reg_num = instr->GetRd();
-      if (format[2] == 'q') {
-        reg_prefix = instr->GetNEONQ() ? 'X' : 'W';
-        field_len = 3;
-      }
       break;
     case 'n':
       reg_num = instr->GetRn();
       break;
     case 'm':
       reg_num = instr->GetRm();
-      switch (format[2]) {
-        // Handle registers tagged with b (bytes), z (instruction), or
-        // r (registers), used for address updates in
-        // NEON load/store instructions.
-        case 'r':
-        case 'b':
-        case 'z': {
-          field_len = 3;
-          char *eimm;
-          int imm = static_cast<int>(strtol(&format[3], &eimm, 10));
-          field_len += eimm - &format[3];
-          if (reg_num == 31) {
-            switch (format[2]) {
-              case 'z':
-                imm *= (1 << instr->GetNEONLSSize());
-                break;
-              case 'r':
-                imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes
-                                                : kQRegSizeInBytes;
-                break;
-              case 'b':
-                break;
-            }
-            AppendToOutput("#%d", imm);
-            return field_len;
-          }
-          break;
-        }
-      }
       break;
     case 'e':
       // This is register Rm, but using a 4-bit specifier. Used in NEON
@@ -5197,72 +9750,121 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr,
       break;
     case 't':
       reg_num = instr->GetRt();
-      if (format[0] == 'V') {
-        if ((format[2] >= '2') && (format[2] <= '4')) {
-          // Handle consecutive vector register specifiers Vt2, Vt3 and Vt4.
-          reg_num = (reg_num + format[2] - '1') % 32;
-          field_len = 3;
-        }
-      } else {
-        if (format[2] == '2') {
-          // Handle register specifier Rt2.
-          reg_num = instr->GetRt2();
-          field_len = 3;
-        }
-      }
       break;
-    case '(': {
-      switch (format[2]) {
-        case 's':
-          reg_num = instr->GetRs();
-          break;
-        case 't':
-          reg_num = instr->GetRt();
-          break;
-        default:
-          VIXL_UNREACHABLE();
-      }
+    default:
+      VIXL_UNREACHABLE();
+  }
 
-      VIXL_ASSERT(format[3] == '+');
-      int i = 4;
-      int addition = 0;
-      while (format[i] != ')') {
-        VIXL_ASSERT((format[i] >= '0') && (format[i] <= '9'));
-        addition *= 10;
-        addition += format[i] - '0';
-        ++i;
+  switch (field[1]) {
+    case '2':
+    case '3':
+    case '4':
+      if ((reg_prefix == 'V') || (reg_prefix == 'Z')) {  // Vt2/3/4, Zt2/3/4
+        VIXL_ASSERT(field[0] == 't');
+        reg_num = (reg_num + field[1] - '1') % 32;
+        field_len++;
+      } else {
+        VIXL_ASSERT((field[0] == 't') && (field[1] == '2'));
+        reg_num = instr->GetRt2();
+        field_len++;
       }
-      reg_num += addition;
-      field_len = i + 1;
       break;
+    case '+':  // Rt+, Rs+ (ie. Rt + 1, Rs + 1)
+      VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X'));
+      VIXL_ASSERT((field[0] == 's') || (field[0] == 't'));
+      reg_num++;
+      field_len++;
+      break;
+    case 's':  // Core registers that are (w)sp rather than zr.
+      VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X'));
+      reg_num = (reg_num == kZeroRegCode) ? kSPRegInternalCode : reg_num;
+      field_len++;
+      break;
+  }
+
+  VIXL_ASSERT(reg_num != UINT_MAX);
+  return std::make_pair(reg_num, field_len);
+}
+
+int Disassembler::SubstituteRegisterField(const Instruction *instr,
+                                          const char *format) {
+  unsigned field_len = 1;  // Initially, count only the first character.
+
+  // The first character of the register format field, eg R, X, S, etc.
+  char reg_prefix = format[0];
+
+  // Pointer to the character after the prefix. This may be one of the standard
+  // symbols representing a register encoding, or a two digit bit position,
+  // handled by the following code.
+  const char *reg_field = &format[1];
+
+  if (reg_prefix == 'R') {
+    bool is_x = instr->GetSixtyFourBits();
+    if (strspn(reg_field, "0123456789") == 2) {  // r20d, r31n, etc.
+      // Core W or X registers where the type is determined by a specified bit
+      // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31
+      // is implicitly used to select between W and X.
+      int bitpos = ((reg_field[0] - '0') * 10) + (reg_field[1] - '0');
+      VIXL_ASSERT(bitpos <= 31);
+      is_x = (instr->ExtractBit(bitpos) == 1);
+      reg_field = &format[3];
+      field_len += 2;
     }
-    default:
-      VIXL_UNREACHABLE();
+    reg_prefix = is_x ? 'X' : 'W';
   }
 
-  // Increase field length for registers tagged as stack.
-  if (format[1] != '(' && format[2] == 's') {
-    field_len = 3;
+  std::pair<unsigned, unsigned> rn =
+      GetRegNumForField(instr, reg_prefix, reg_field);
+  unsigned reg_num = rn.first;
+  field_len += rn.second;
+
+  if (reg_field[0] == 'm') {
+    switch (reg_field[1]) {
+      // Handle registers tagged with b (bytes), z (instruction), or
+      // r (registers), used for address updates in NEON load/store
+      // instructions.
+      case 'r':
+      case 'b':
+      case 'z': {
+        VIXL_ASSERT(reg_prefix == 'X');
+        field_len = 3;
+        char *eimm;
+        int imm = static_cast<int>(strtol(&reg_field[2], &eimm, 10));
+        field_len += eimm - &reg_field[2];
+        if (reg_num == 31) {
+          switch (reg_field[1]) {
+            case 'z':
+              imm *= (1 << instr->GetNEONLSSize());
+              break;
+            case 'r':
+              imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes
+                                              : kQRegSizeInBytes;
+              break;
+            case 'b':
+              break;
+          }
+          AppendToOutput("#%d", imm);
+          return field_len;
+        }
+        break;
+      }
+    }
   }
 
   CPURegister::RegisterType reg_type = CPURegister::kRegister;
   unsigned reg_size = kXRegSize;
 
-  switch (reg_prefix) {
-    case 'R':
-      reg_prefix = instr->GetSixtyFourBits() ? 'X' : 'W';
-      break;
-    case 'F':
-      switch (instr->GetFPType()) {
-        case 3:
-          reg_prefix = 'H';
-          break;
-        case 0:
-          reg_prefix = 'S';
-          break;
-        default:
-          reg_prefix = 'D';
-      }
+  if (reg_prefix == 'F') {
+    switch (instr->GetFPType()) {
+      case 3:
+        reg_prefix = 'H';
+        break;
+      case 0:
+        reg_prefix = 'S';
+        break;
+      default:
+        reg_prefix = 'D';
+    }
   }
 
   switch (reg_prefix) {
@@ -5295,22 +9897,51 @@ int Disassembler::SubstituteRegisterField(const Instruction *instr,
       reg_size = kQRegSize;
       break;
     case 'V':
+      if (reg_field[1] == 'v') {
+        reg_type = CPURegister::kVRegister;
+        reg_size = 1 << (instr->GetSVESize() + 3);
+        field_len++;
+        break;
+      }
       AppendToOutput("v%d", reg_num);
       return field_len;
+    case 'Z':
+      AppendToOutput("z%d", reg_num);
+      return field_len;
     default:
       VIXL_UNREACHABLE();
   }
 
-  if ((reg_type == CPURegister::kRegister) && (reg_num == kZeroRegCode) &&
-      (format[2] == 's')) {
-    reg_num = kSPRegInternalCode;
-  }
-
   AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type));
 
   return field_len;
 }
 
+int Disassembler::SubstitutePredicateRegisterField(const Instruction *instr,
+                                                   const char *format) {
+  VIXL_ASSERT(format[0] == 'P');
+  switch (format[1]) {
+    // This field only supports P register that are always encoded in the same
+    // position.
+    case 'd':
+    case 't':
+      AppendToOutput("p%u", instr->GetPt());
+      break;
+    case 'n':
+      AppendToOutput("p%u", instr->GetPn());
+      break;
+    case 'm':
+      AppendToOutput("p%u", instr->GetPm());
+      break;
+    case 'g':
+      VIXL_ASSERT(format[2] == 'l');
+      AppendToOutput("p%u", instr->GetPgLow8());
+      return 3;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  return 2;
+}
 
 int Disassembler::SubstituteImmediateField(const Instruction *instr,
                                            const char *format) {
@@ -5391,36 +10022,92 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
       return 6;
     }
     case 'A': {  // IAddSub.
-      VIXL_ASSERT(instr->GetShiftAddSub() <= 1);
-      int64_t imm = instr->GetImmAddSub() << (12 * instr->GetShiftAddSub());
+      int64_t imm = instr->GetImmAddSub() << (12 * instr->GetImmAddSubShift());
       AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm);
       return 7;
     }
-    case 'F': {                // IFPHalf, IFPSingle, IFPDouble, or IFPFBits.
-      if (format[3] == 'F') {  // IFPFbits.
-        AppendToOutput("#%" PRId32, 64 - instr->GetFPScale());
-        return 8;
-      } else {
-        AppendToOutput("#0x%" PRIx32 " (%.4f)",
-                       instr->GetImmFP(),
-                       format[3] == 'H'
-                           ? FPToFloat(instr->GetImmFP16(), kIgnoreDefaultNaN)
-                           : (format[3] == 'S') ? instr->GetImmFP32()
-                                                : instr->GetImmFP64());
-        if (format[3] == 'H') {
-          return 7;
-        } else {
-          return 9;
-        }
+    case 'F': {  // IFP, IFPNeon, IFPSve or IFPFBits.
+      int imm8 = 0;
+      int len = strlen("IFP");
+      switch (format[3]) {
+        case 'F':
+          VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0);
+          AppendToOutput("#%" PRId32, 64 - instr->GetFPScale());
+          return strlen("IFPFBits");
+        case 'N':
+          VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0);
+          imm8 = instr->GetImmNEONabcdefgh();
+          len += strlen("Neon");
+          break;
+        case 'S':
+          VIXL_ASSERT(strncmp(format, "IFPSve", strlen("IFPSve")) == 0);
+          imm8 = instr->ExtractBits(12, 5);
+          len += strlen("Sve");
+          break;
+        default:
+          VIXL_ASSERT(strncmp(format, "IFP", strlen("IFP")) == 0);
+          imm8 = instr->GetImmFP();
+          break;
       }
+      AppendToOutput("#0x%" PRIx32 " (%.4f)",
+                     imm8,
+                     Instruction::Imm8ToFP32(imm8));
+      return len;
     }
     case 'H': {  // IH - ImmHint
       AppendToOutput("#%" PRId32, instr->GetImmHint());
       return 2;
     }
     case 'T': {  // ITri - Immediate Triangular Encoded.
-      AppendToOutput("#0x%" PRIx64, instr->GetImmLogical());
-      return 4;
+      if (format[4] == 'S') {
+        VIXL_ASSERT((format[5] == 'v') && (format[6] == 'e'));
+        switch (format[7]) {
+          case 'l':
+            // SVE logical immediate encoding.
+            AppendToOutput("#0x%" PRIx64, instr->GetSVEImmLogical());
+            return 8;
+          case 'p': {
+            // SVE predicated shift immediate encoding, lsl.
+            std::pair<int, int> shift_and_lane_size =
+                instr->GetSVEImmShiftAndLaneSizeLog2(
+                    /* is_predicated = */ true);
+            int lane_bits = 8 << shift_and_lane_size.second;
+            AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first);
+            return 8;
+          }
+          case 'q': {
+            // SVE predicated shift immediate encoding, asr and lsr.
+            std::pair<int, int> shift_and_lane_size =
+                instr->GetSVEImmShiftAndLaneSizeLog2(
+                    /* is_predicated = */ true);
+            AppendToOutput("#%" PRId32, shift_and_lane_size.first);
+            return 8;
+          }
+          case 'r': {
+            // SVE unpredicated shift immediate encoding, lsl.
+            std::pair<int, int> shift_and_lane_size =
+                instr->GetSVEImmShiftAndLaneSizeLog2(
+                    /* is_predicated = */ false);
+            int lane_bits = 8 << shift_and_lane_size.second;
+            AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first);
+            return 8;
+          }
+          case 's': {
+            // SVE unpredicated shift immediate encoding, asr and lsr.
+            std::pair<int, int> shift_and_lane_size =
+                instr->GetSVEImmShiftAndLaneSizeLog2(
+                    /* is_predicated = */ false);
+            AppendToOutput("#%" PRId32, shift_and_lane_size.first);
+            return 8;
+          }
+          default:
+            VIXL_UNREACHABLE();
+            return 0;
+        }
+      } else {
+        AppendToOutput("#0x%" PRIx64, instr->GetImmLogical());
+        return 4;
+      }
     }
     case 'N': {  // INzcv.
       int nzcv = (instr->GetNzcv() << Flags_offset);
@@ -5442,12 +10129,21 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
       AppendToOutput("#%" PRId32, instr->GetImmS());
       return 8;
     }
-    case 'S': {  // IS - Test and branch bit.
+    case 't': {  // It - Test and branch bit.
       AppendToOutput("#%" PRId32,
                      (instr->GetImmTestBranchBit5() << 5) |
                          instr->GetImmTestBranchBit40());
       return 2;
     }
+    case 'S': {  // ISveSvl - SVE 'mul vl' immediate for structured ld/st.
+      VIXL_ASSERT(strncmp(format, "ISveSvl", 7) == 0);
+      int imm = instr->ExtractSignedBits(19, 16);
+      if (imm != 0) {
+        int reg_count = instr->ExtractBits(22, 21) + 1;
+        AppendToOutput(", #%d, mul vl", imm * reg_count);
+      }
+      return 7;
+    }
     case 's': {  // Is - Shift (immediate).
       switch (format[2]) {
         case '1': {  // Is1 - SSHR.
@@ -5539,6 +10235,13 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
               }
             }
             return 0;
+          } else if (strncmp(format,
+                             "IVInsSVEIndex",
+                             strlen("IVInsSVEIndex")) == 0) {
+            std::pair<int, int> index_and_lane_size =
+                instr->GetSVEPermuteIndexAndLaneSizeLog2();
+            AppendToOutput("%d", index_and_lane_size.first);
+            return strlen("IVInsSVEIndex");
           }
           VIXL_FALLTHROUGH();
         }
@@ -5547,27 +10250,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
           return 9;
         }
         case 'M': {  // Modified Immediate cases.
-          if (strncmp(format, "IVMIImmFPHalf", strlen("IVMIImmFPHalf")) == 0) {
-            AppendToOutput("#0x%" PRIx32 " (%.4f)",
-                           instr->GetImmNEONabcdefgh(),
-                           FPToFloat(instr->GetImmNEONFP16(),
-                                     kIgnoreDefaultNaN));
-            return strlen("IVMIImmFPHalf");
-          } else if (strncmp(format,
-                             "IVMIImmFPSingle",
-                             strlen("IVMIImmFPSingle")) == 0) {
-            AppendToOutput("#0x%" PRIx32 " (%.4f)",
-                           instr->GetImmNEONabcdefgh(),
-                           instr->GetImmNEONFP32());
-            return strlen("IVMIImmFPSingle");
-          } else if (strncmp(format,
-                             "IVMIImmFPDouble",
-                             strlen("IVMIImmFPDouble")) == 0) {
-            AppendToOutput("#0x%" PRIx32 " (%.4f)",
-                           instr->GetImmNEONabcdefgh(),
-                           instr->GetImmNEONFP64());
-            return strlen("IVMIImmFPDouble");
-          } else if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
+          if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) {
             uint64_t imm8 = instr->GetImmNEONabcdefgh();
             AppendToOutput("#0x%" PRIx64, imm8);
             return strlen("IVMIImm8");
@@ -5647,6 +10330,48 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
         }
       }
     }
+    case 'p': {  // Ipc - SVE predicate constraint specifier.
+      VIXL_ASSERT(format[2] == 'c');
+      unsigned pattern = instr->GetImmSVEPredicateConstraint();
+      switch (pattern) {
+        // VL1-VL8 are encoded directly.
+        case SVE_VL1:
+        case SVE_VL2:
+        case SVE_VL3:
+        case SVE_VL4:
+        case SVE_VL5:
+        case SVE_VL6:
+        case SVE_VL7:
+        case SVE_VL8:
+          AppendToOutput("vl%u", pattern);
+          break;
+        // VL16-VL256 are encoded as log2(N) + c.
+        case SVE_VL16:
+        case SVE_VL32:
+        case SVE_VL64:
+        case SVE_VL128:
+        case SVE_VL256:
+          AppendToOutput("vl%u", 16 << (pattern - SVE_VL16));
+          break;
+        // Special cases.
+        case SVE_POW2:
+          AppendToOutput("pow2");
+          break;
+        case SVE_MUL4:
+          AppendToOutput("mul4");
+          break;
+        case SVE_MUL3:
+          AppendToOutput("mul3");
+          break;
+        case SVE_ALL:
+          AppendToOutput("all");
+          break;
+        default:
+          AppendToOutput("#0x%x", pattern);
+          break;
+      }
+      return 3;
+    }
     default: {
       VIXL_UNIMPLEMENTED();
       return 0;
@@ -5736,11 +10461,11 @@ int Disassembler::SubstituteShiftField(const Instruction *instr,
   VIXL_ASSERT(instr->GetShiftDP() <= 0x3);
 
   switch (format[1]) {
-    case 'D': {  // HDP.
+    case 'D': {  // NDP.
       VIXL_ASSERT(instr->GetShiftDP() != ROR);
       VIXL_FALLTHROUGH();
     }
-    case 'L': {  // HLo.
+    case 'L': {  // NLo.
       if (instr->GetImmDPShift() != 0) {
         const char *shift_type[] = {"lsl", "lsr", "asr", "ror"};
         AppendToOutput(", %s #%" PRId32,
@@ -5749,6 +10474,14 @@ int Disassembler::SubstituteShiftField(const Instruction *instr,
       }
       return 3;
     }
+    case 'S': {  // NSveS (SVE structured load/store indexing shift).
+      VIXL_ASSERT(strncmp(format, "NSveS", 5) == 0);
+      int msz = instr->ExtractBits(24, 23);
+      if (msz > 0) {
+        AppendToOutput(", lsl #%d", msz);
+      }
+      return 5;
+    }
     default:
       VIXL_UNIMPLEMENTED();
       return 0;
@@ -5919,30 +10652,43 @@ int Disassembler::SubstituteLSRegOffsetField(const Instruction *instr,
 
 int Disassembler::SubstitutePrefetchField(const Instruction *instr,
                                           const char *format) {
-  VIXL_ASSERT(format[0] == 'P');
+  VIXL_ASSERT(format[0] == 'p');
   USE(format);
 
-  static const char *hints[] = {"ld", "li", "st"};
+  bool is_sve =
+      (strncmp(format, "prefSVEOp", strlen("prefSVEOp")) == 0) ? true : false;
+  int placeholder_length = is_sve ? 9 : 6;
   static const char *stream_options[] = {"keep", "strm"};
 
-  unsigned hint = instr->GetPrefetchHint();
+  auto get_hints = [](bool is_sve) -> std::vector<std::string> {
+    static const std::vector<std::string> sve_hints = {"ld", "st"};
+    static const std::vector<std::string> core_hints = {"ld", "li", "st"};
+    return (is_sve) ? sve_hints : core_hints;
+  };
+
+  std::vector<std::string> hints = get_hints(is_sve);
+  unsigned hint =
+      is_sve ? instr->GetSVEPrefetchHint() : instr->GetPrefetchHint();
   unsigned target = instr->GetPrefetchTarget() + 1;
   unsigned stream = instr->GetPrefetchStream();
 
-  if ((hint >= ArrayLength(hints)) || (target > 3)) {
+  if ((hint >= hints.size()) || (target > 3)) {
     // Unallocated prefetch operations.
-    int prefetch_mode = instr->GetImmPrefetchOperation();
-    AppendToOutput("#0b%c%c%c%c%c",
-                   (prefetch_mode & (1 << 4)) ? '1' : '0',
-                   (prefetch_mode & (1 << 3)) ? '1' : '0',
-                   (prefetch_mode & (1 << 2)) ? '1' : '0',
-                   (prefetch_mode & (1 << 1)) ? '1' : '0',
-                   (prefetch_mode & (1 << 0)) ? '1' : '0');
+    if (is_sve) {
+      std::bitset<4> prefetch_mode(instr->GetSVEImmPrefetchOperation());
+      AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
+    } else {
+      std::bitset<5> prefetch_mode(instr->GetImmPrefetchOperation());
+      AppendToOutput("#0b%s", prefetch_mode.to_string().c_str());
+    }
   } else {
     VIXL_ASSERT(stream < ArrayLength(stream_options));
-    AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
+    AppendToOutput("p%sl%d%s",
+                   hints[hint].c_str(),
+                   target,
+                   stream_options[stream]);
   }
-  return 6;
+  return placeholder_length;
 }
 
 int Disassembler::SubstituteBarrierField(const Instruction *instr,
@@ -5997,6 +10743,159 @@ int Disassembler::SubstituteCrField(const Instruction *instr,
   return 2;
 }
 
+int Disassembler::SubstituteIntField(const Instruction *instr,
+                                     const char *format) {
+  VIXL_ASSERT((format[0] == 'u') || (format[0] == 's'));
+
+  // A generic signed or unsigned int field uses a placeholder of the form
+  // 'sAABB and 'uAABB respectively where AA and BB are two digit bit positions
+  // between 00 and 31, and AA >= BB. The placeholder is substituted with the
+  // decimal integer represented by the bits in the instruction between
+  // positions AA and BB inclusive.
+  //
+  // In addition, split fields can be represented using 'sAABB:CCDD, where CCDD
+  // become the least-significant bits of the result, and bit AA is the sign bit
+  // (if 's is used).
+  int32_t bits = 0;
+  int width = 0;
+  const char *c = format;
+  do {
+    c++;  // Skip the 'u', 's' or ':'.
+    VIXL_ASSERT(strspn(c, "0123456789") == 4);
+    int msb = ((c[0] - '0') * 10) + (c[1] - '0');
+    int lsb = ((c[2] - '0') * 10) + (c[3] - '0');
+    c += 4;  // Skip the characters we just read.
+    int chunk_width = msb - lsb + 1;
+    VIXL_ASSERT((chunk_width > 0) && (chunk_width < 32));
+    bits = (bits << chunk_width) | (instr->ExtractBits(msb, lsb));
+    width += chunk_width;
+  } while (*c == ':');
+  VIXL_ASSERT(IsUintN(width, bits));
+
+  if (format[0] == 's') {
+    bits = ExtractSignedBitfield32(width - 1, 0, bits);
+  }
+
+  if (*c == '+') {
+    // A "+n" trailing the format specifier indicates the extracted value should
+    // be incremented by n. This is for cases where the encoding is zero-based,
+    // but range of values is not, eg. values [1, 16] encoded as [0, 15]
+    char *new_c;
+    uint64_t value = strtoul(c + 1, &new_c, 10);
+    c = new_c;
+    VIXL_ASSERT(IsInt32(value));
+    bits += value;
+  } else if (*c == '*') {
+    // Similarly, a "*n" trailing the format specifier indicates the extracted
+    // value should be multiplied by n. This is for cases where the encoded
+    // immediate is scaled, for example by access size.
+    char *new_c;
+    uint64_t value = strtoul(c + 1, &new_c, 10);
+    c = new_c;
+    VIXL_ASSERT(IsInt32(value));
+    bits *= value;
+  }
+
+  AppendToOutput("%d", bits);
+
+  return static_cast<int>(c - format);
+}
+
+int Disassembler::SubstituteSVESize(const Instruction *instr,
+                                    const char *format) {
+  USE(format);
+  VIXL_ASSERT(format[0] == 't');
+
+  static const char sizes[] = {'b', 'h', 's', 'd', 'q'};
+  // TODO: only the most common case for <size> is supported at the moment,
+  // and even then, the RESERVED values are handled as if they're not
+  // reserved.
+  unsigned size_in_bytes_log2 = instr->GetSVESize();
+  int placeholder_length = 1;
+  switch (format[1]) {
+    case 'l':
+      placeholder_length++;
+      if (format[2] == 's') {
+        // 'tls: Loads and stores
+        size_in_bytes_log2 = instr->ExtractBits(22, 21);
+        placeholder_length++;
+        if (format[3] == 's') {
+          // Sign extension load.
+          unsigned msize = instr->ExtractBits(24, 23);
+          if (msize > size_in_bytes_log2) size_in_bytes_log2 ^= 0x3;
+          placeholder_length++;
+        }
+      } else {
+        // 'tl: Logical operations
+        size_in_bytes_log2 = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+      }
+      break;
+    case 'm':  // 'tmsz
+      VIXL_ASSERT(strncmp(format, "tmsz", 4) == 0);
+      placeholder_length += 3;
+      size_in_bytes_log2 = instr->ExtractBits(24, 23);
+      break;
+    case 's':
+      if (format[2] == 'z') {
+        VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') ||
+                    (format[3] == 'p'));
+        if (format[3] == 'x') {
+          // 'tszx: Indexes.
+          std::pair<int, int> index_and_lane_size =
+              instr->GetSVEPermuteIndexAndLaneSizeLog2();
+          size_in_bytes_log2 = index_and_lane_size.second;
+        } else if (format[3] == 'p') {
+          // 'tszp: Predicated shifts.
+          std::pair<int, int> shift_and_lane_size =
+              instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
+          size_in_bytes_log2 = shift_and_lane_size.second;
+        } else {
+          // 'tszs: Unpredicated shifts.
+          std::pair<int, int> shift_and_lane_size =
+              instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+          size_in_bytes_log2 = shift_and_lane_size.second;
+        }
+        placeholder_length += 3;  // skip `sz[x|s]`
+      }
+      break;
+    case 'h':
+      // Half size of the lane size field.
+      size_in_bytes_log2 -= 1;
+      placeholder_length++;
+      break;
+    case 'q':
+      // Quarter size of the lane size field.
+      size_in_bytes_log2 -= 2;
+      placeholder_length++;
+      break;
+    default:
+      break;
+  }
+
+  VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(sizes));
+  AppendToOutput("%c", sizes[size_in_bytes_log2]);
+
+  return placeholder_length;
+}
+
+int Disassembler::SubstituteTernary(const Instruction *instr,
+                                    const char *format) {
+  VIXL_ASSERT((format[0] == '?') && (format[3] == ':'));
+
+  // The ternary substitution of the format "'?bb:TF" is replaced by a single
+  // character, either T or F, depending on the value of the bit at position
+  // bb in the instruction. For example, "'?31:xw" is substituted with "x" if
+  // bit 31 is true, and "w" otherwise.
+  VIXL_ASSERT(strspn(&format[1], "0123456789") == 2);
+  char *c;
+  uint64_t value = strtoul(&format[1], &c, 10);
+  VIXL_ASSERT(value < (kInstructionSize * kBitsPerByte));
+  VIXL_ASSERT((*c == ':') && (strlen(c) >= 3));  // Minimum of ":TF"
+  c++;
+  AppendToOutput("%c", c[1 - instr->ExtractBit(static_cast<int>(value))]);
+  return 6;
+}
+
 void Disassembler::ResetOutput() {
   buffer_pos_ = 0;
   buffer_[buffer_pos_] = 0;
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index c650bee9..b59840aa 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -27,6 +27,8 @@
 #ifndef VIXL_AARCH64_DISASM_AARCH64_H
 #define VIXL_AARCH64_DISASM_AARCH64_H
 
+#include <utility>
+
 #include "../globals-vixl.h"
 #include "../utils-vixl.h"
 
@@ -112,10 +114,13 @@ class Disassembler : public DecoderVisitor {
  private:
   void Format(const Instruction* instr,
               const char* mnemonic,
-              const char* format);
+              const char* format0,
+              const char* format1 = NULL);
   void Substitute(const Instruction* instr, const char* string);
   int SubstituteField(const Instruction* instr, const char* format);
   int SubstituteRegisterField(const Instruction* instr, const char* format);
+  int SubstitutePredicateRegisterField(const Instruction* instr,
+                                       const char* format);
   int SubstituteImmediateField(const Instruction* instr, const char* format);
   int SubstituteLiteralField(const Instruction* instr, const char* format);
   int SubstituteBitfieldImmediateField(const Instruction* instr,
@@ -130,6 +135,14 @@ class Disassembler : public DecoderVisitor {
   int SubstituteBarrierField(const Instruction* instr, const char* format);
   int SubstituteSysOpField(const Instruction* instr, const char* format);
   int SubstituteCrField(const Instruction* instr, const char* format);
+  int SubstituteIntField(const Instruction* instr, const char* format);
+  int SubstituteSVESize(const Instruction* instr, const char* format);
+  int SubstituteTernary(const Instruction* instr, const char* format);
+
+  std::pair<unsigned, unsigned> GetRegNumForField(const Instruction* instr,
+                                                  char reg_prefix,
+                                                  const char* field);
+
   bool RdIsZROrSP(const Instruction* instr) const {
     return (instr->GetRd() == kZeroRegCode);
   }
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index a99a0459..b3e28384 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -35,7 +35,8 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
                                     unsigned width) {
   VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) ||
               (width == 32));
-  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
+  VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
+              (reg_size == kSRegSize) || (reg_size == kDRegSize));
   uint64_t result = value & ((UINT64_C(1) << width) - 1);
   for (unsigned i = width; i < reg_size; i *= 2) {
     result |= (result << i);
@@ -43,6 +44,503 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
   return result;
 }
 
+bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const {
+  bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
+  bool movprfx_is_unpredicated =
+      movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
+  VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated);
+
+  int movprfx_zd = movprfx->GetRd();
+  int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1;
+  VectorFormat movprfx_vform =
+      movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined;
+
+  bool pg_matches_low8 = movprfx_pg == GetPgLow8();
+  bool vform_matches = movprfx_vform == GetSVEVectorFormat();
+  bool zd_matches = movprfx_zd == GetRd();
+  bool zd_matches_zm = movprfx_zd == GetRm();
+  bool zd_matches_zn = movprfx_zd == GetRn();
+
+  switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
+    case AND_z_zi:
+    case EOR_z_zi:
+    case ORR_z_zi:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEBitwiseLogical_PredicatedMask)) {
+    case AND_z_p_zz:
+    case BIC_z_p_zz:
+    case EOR_z_p_zz:
+    case ORR_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+    case ASRD_z_p_zi:
+    case ASR_z_p_zi:
+    case LSL_z_p_zi:
+    case LSR_z_p_zi:
+      if (movprfx_is_predicated) {
+        if (!pg_matches_low8) return false;
+        unsigned tsz = ExtractBits<0x00c00300>();
+        VectorFormat instr_vform =
+            SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz));
+        if (movprfx_vform != instr_vform) return false;
+      }
+      return zd_matches;
+  }
+  switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+    case ASRR_z_p_zz:
+    case ASR_z_p_zz:
+    case LSLR_z_p_zz:
+    case LSL_z_p_zz:
+    case LSRR_z_p_zz:
+    case LSR_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+    case ASR_z_p_zw:
+    case LSL_z_p_zw:
+    case LSR_z_p_zw:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+    case CLASTA_z_p_zz:
+    case CLASTB_z_p_zz:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVECopyFPImm_PredicatedMask)) {
+    case FCPY_z_p_i:
+      if (movprfx_is_predicated) {
+        if (!vform_matches) return false;
+        if (movprfx_pg != GetRx<19, 16>()) return false;
+      }
+      return zd_matches;
+  }
+  switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_r:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches;
+  }
+  switch (Mask(SVECopyIntImm_PredicatedMask)) {
+    case CPY_z_p_i:
+      if (movprfx_is_predicated) {
+        if (!vform_matches) return false;
+        if (movprfx_pg != GetRx<19, 16>()) return false;
+      }
+      // Only the merging form can take movprfx.
+      if (ExtractBit(14) == 0) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_v:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+    case FADD_z_p_zs:
+    case FMAXNM_z_p_zs:
+    case FMAX_z_p_zs:
+    case FMINNM_z_p_zs:
+    case FMIN_z_p_zs:
+    case FMUL_z_p_zs:
+    case FSUBR_z_p_zs:
+    case FSUB_z_p_zs:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches;
+  }
+  switch (Mask(SVEFPArithmetic_PredicatedMask)) {
+    case FABD_z_p_zz:
+    case FADD_z_p_zz:
+    case FDIVR_z_p_zz:
+    case FDIV_z_p_zz:
+    case FMAXNM_z_p_zz:
+    case FMAX_z_p_zz:
+    case FMINNM_z_p_zz:
+    case FMIN_z_p_zz:
+    case FMULX_z_p_zz:
+    case FMUL_z_p_zz:
+    case FSCALE_z_p_zz:
+    case FSUBR_z_p_zz:
+    case FSUB_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEFPComplexAdditionMask)) {
+    case FCADD_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEFPComplexMulAddIndexMask)) {
+    case FCMLA_z_zzzi_h:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<18, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+    case FCMLA_z_zzzi_s:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<19, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPComplexMulAddMask)) {
+    case FCMLA_z_p_zzz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zm && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPConvertPrecisionMask)) {
+    case FCVT_z_p_z_d2h:
+    case FCVT_z_p_z_d2s:
+    case FCVT_z_p_z_h2d:
+    case FCVT_z_p_z_h2s:
+    case FCVT_z_p_z_s2d:
+    case FCVT_z_p_z_s2h:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPConvertToIntMask)) {
+    case FCVTZS_z_p_z_d2w:
+    case FCVTZS_z_p_z_d2x:
+    case FCVTZS_z_p_z_fp162h:
+    case FCVTZS_z_p_z_fp162w:
+    case FCVTZS_z_p_z_fp162x:
+    case FCVTZS_z_p_z_s2w:
+    case FCVTZS_z_p_z_s2x:
+    case FCVTZU_z_p_z_d2w:
+    case FCVTZU_z_p_z_d2x:
+    case FCVTZU_z_p_z_fp162h:
+    case FCVTZU_z_p_z_fp162w:
+    case FCVTZU_z_p_z_fp162x:
+    case FCVTZU_z_p_z_s2w:
+    case FCVTZU_z_p_z_s2x:
+      if (movprfx_is_predicated) {
+        if (!pg_matches_low8) return false;
+        // The movprfx element size must match the instruction's maximum encoded
+        // element size. We have to partially decode the opc and opc2 fields to
+        // find this.
+        unsigned opc = ExtractBits(23, 22);
+        unsigned opc2 = ExtractBits(18, 17);
+        VectorFormat instr_vform =
+            SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
+        if (movprfx_vform != instr_vform) return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPMulAddIndexMask)) {
+    case FMLA_z_zzzi_h:
+    case FMLA_z_zzzi_h_i3h:
+    case FMLA_z_zzzi_s:
+    case FMLS_z_zzzi_h:
+    case FMLS_z_zzzi_h_i3h:
+    case FMLS_z_zzzi_s:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<18, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+    case FMLA_z_zzzi_d:
+    case FMLS_z_zzzi_d:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<19, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPMulAddMask)) {
+    case FMAD_z_p_zzz:
+    case FMSB_z_p_zzz:
+    case FNMAD_z_p_zzz:
+    case FNMSB_z_p_zzz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<20, 16>()) return false;
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+    case FMLA_z_p_zzz:
+    case FMLS_z_p_zzz:
+    case FNMLA_z_p_zzz:
+    case FNMLS_z_p_zzz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zm && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPRoundToIntegralValueMask)) {
+    case FRINTA_z_p_z:
+    case FRINTI_z_p_z:
+    case FRINTM_z_p_z:
+    case FRINTN_z_p_z:
+    case FRINTP_z_p_z:
+    case FRINTX_z_p_z:
+    case FRINTZ_z_p_z:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEFPTrigMulAddCoefficientMask)) {
+    case FTMAD_z_zzi:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEFPUnaryOpMask)) {
+    case FRECPX_z_p_z:
+    case FSQRT_z_p_z:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEIncDecByPredicateCountMask)) {
+    case DECP_z_p_z:
+    case INCP_z_p_z:
+    case SQDECP_z_p_z:
+    case SQINCP_z_p_z:
+    case UQDECP_z_p_z:
+    case UQINCP_z_p_z:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEIncDecVectorByElementCountMask)) {
+    case DECD_z_zs:
+    case DECH_z_zs:
+    case DECW_z_zs:
+    case INCD_z_zs:
+    case INCH_z_zs:
+    case INCW_z_zs:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEInsertGeneralRegisterMask)) {
+    case INSR_z_r:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+    case INSR_z_v:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+    case ADD_z_zi:
+    case SQADD_z_zi:
+    case SQSUB_z_zi:
+    case SUBR_z_zi:
+    case SUB_z_zi:
+    case UQADD_z_zi:
+    case UQSUB_z_zi:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+    case ADD_z_p_zz:
+    case SUBR_z_p_zz:
+    case SUB_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEIntConvertToFPMask)) {
+    case SCVTF_z_p_z_h2fp16:
+    case SCVTF_z_p_z_w2d:
+    case SCVTF_z_p_z_w2fp16:
+    case SCVTF_z_p_z_w2s:
+    case SCVTF_z_p_z_x2d:
+    case SCVTF_z_p_z_x2fp16:
+    case SCVTF_z_p_z_x2s:
+    case UCVTF_z_p_z_h2fp16:
+    case UCVTF_z_p_z_w2d:
+    case UCVTF_z_p_z_w2fp16:
+    case UCVTF_z_p_z_w2s:
+    case UCVTF_z_p_z_x2d:
+    case UCVTF_z_p_z_x2fp16:
+    case UCVTF_z_p_z_x2s:
+      if (movprfx_is_predicated) {
+        if (!pg_matches_low8) return false;
+        // The movprfx element size must match the instruction's maximum encoded
+        // element size. We have to partially decode the opc and opc2 fields to
+        // find this.
+        unsigned opc = ExtractBits(23, 22);
+        unsigned opc2 = ExtractBits(18, 17);
+        VectorFormat instr_vform =
+            SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
+        if (movprfx_vform != instr_vform) return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEIntDivideVectors_PredicatedMask)) {
+    case SDIVR_z_p_zz:
+    case SDIV_z_p_zz:
+    case UDIVR_z_p_zz:
+    case UDIV_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+    case SABD_z_p_zz:
+    case SMAX_z_p_zz:
+    case SMIN_z_p_zz:
+    case UABD_z_p_zz:
+    case UMAX_z_p_zz:
+    case UMIN_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+    case SMAX_z_zi:
+    case SMIN_z_zi:
+    case UMAX_z_zi:
+    case UMIN_z_zi:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEIntMulAddPredicatedMask)) {
+    case MAD_z_p_zzz:
+    case MSB_z_p_zzz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches && !zd_matches_zm;
+    case MLA_z_p_zzz:
+    case MLS_z_p_zzz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zm && !zd_matches_zn;
+  }
+  switch (Mask(SVEIntMulAddUnpredicatedMask)) {
+    case SDOT_z_zzz:
+    case UDOT_z_zzz:
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zm &&
+             !zd_matches_zn;
+  }
+  switch (Mask(SVEIntMulImm_UnpredicatedMask)) {
+    case MUL_z_zi:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEIntMulVectors_PredicatedMask)) {
+    case MUL_z_p_zz:
+    case SMULH_z_p_zz:
+    case UMULH_z_p_zz:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return zd_matches;
+  }
+  switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+    case ABS_z_p_z:
+    case CLS_z_p_z:
+    case CLZ_z_p_z:
+    case CNOT_z_p_z:
+    case CNT_z_p_z:
+    case FABS_z_p_z:
+    case FNEG_z_p_z:
+    case NEG_z_p_z:
+    case NOT_z_p_z:
+    case SXTB_z_p_z:
+    case SXTH_z_p_z:
+    case SXTW_z_p_z:
+    case UXTB_z_p_z:
+    case UXTH_z_p_z:
+    case UXTW_z_p_z:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEMulIndexMask)) {
+    case SDOT_z_zzzi_s:
+    case UDOT_z_zzzi_s:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<18, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+    case SDOT_z_zzzi_d:
+    case UDOT_z_zzzi_d:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<19, 16>()) return false;
+      return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVEPermuteVectorExtractMask)) {
+    case EXT_z_zi_des:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEReverseWithinElementsMask)) {
+    case RBIT_z_p_z:
+    case REVB_z_z:
+    case REVH_z_z:
+    case REVW_z_z:
+      if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
+        return false;
+      }
+      return zd_matches && !zd_matches_zn;
+  }
+  switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+    case SQDECD_z_zs:
+    case SQDECH_z_zs:
+    case SQDECW_z_zs:
+    case SQINCD_z_zs:
+    case SQINCH_z_zs:
+    case SQINCW_z_zs:
+    case UQDECD_z_zs:
+    case UQDECH_z_zs:
+    case UQDECW_z_zs:
+    case UQINCD_z_zs:
+    case UQINCH_z_zs:
+    case UQINCW_z_zs:
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  switch (Mask(SVEVectorSplice_DestructiveMask)) {
+    case SPLICE_z_p_zz_des:
+      // The movprfx's `zd` must not alias any other inputs.
+      if (movprfx_zd == GetRx<9, 5>()) return false;
+      return movprfx_is_unpredicated && zd_matches;
+  }
+  return false;
+}  // NOLINT(readability/fn_size)
 
 bool Instruction::IsLoad() const {
   if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) {
@@ -103,6 +601,16 @@ bool Instruction::IsStore() const {
 }
 
 
+std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
+  uint32_t imm_2 = ExtractBits<0x00C00000>();
+  uint32_t tsz_5 = ExtractBits<0x001F0000>();
+  uint32_t imm_7 = (imm_2 << 5) | tsz_5;
+  int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5);
+  int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7);
+  return std::make_pair(index, lane_size_in_byte_log_2);
+}
+
+
 // Logical immediates can't encode zero, so a return value of zero is used to
 // indicate a failure case. Specifically, where the constraints on imm_s are
 // not met.
@@ -111,7 +619,108 @@ uint64_t Instruction::GetImmLogical() const {
   int32_t n = GetBitN();
   int32_t imm_s = GetImmSetBits();
   int32_t imm_r = GetImmRotate();
+  return DecodeImmBitMask(n, imm_s, imm_r, reg_size);
+}
+
+// Logical immediates can't encode zero, so a return value of zero is used to
+// indicate a failure case. Specifically, where the constraints on imm_s are
+// not met.
+uint64_t Instruction::GetSVEImmLogical() const {
+  int n = GetSVEBitN();
+  int imm_s = GetSVEImmSetBits();
+  int imm_r = GetSVEImmRotate();
+  int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2();
+  switch (lane_size_in_bytes_log2) {
+    case kDRegSizeInBytesLog2:
+    case kSRegSizeInBytesLog2:
+    case kHRegSizeInBytesLog2:
+    case kBRegSizeInBytesLog2: {
+      int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3);
+      return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits);
+    }
+    default:
+      return 0;
+  }
+}
 
+std::pair<int, int> Instruction::GetSVEImmShiftAndLaneSizeLog2(
+    bool is_predicated) const {
+  Instr tsize =
+      is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>();
+  Instr imm_3 =
+      is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>();
+  if (tsize == 0) {
+    // The bit field `tsize` means undefined if it is zero, so return a
+    // convenience value kWMinInt to indicate a failure case.
+    return std::make_pair(kWMinInt, kWMinInt);
+  }
+
+  int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1;
+  int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte;
+  int shift = (2 * esize) - ((tsize << 3) | imm_3);
+  return std::make_pair(shift, lane_size_in_bytes_log_2);
+}
+
+int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const {
+  Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb);
+  if (is_signed) {
+    dtype_h = dtype_h ^ 0x3;
+  }
+  return dtype_h;
+}
+
+int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const {
+  Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb);
+  if (is_signed) {
+    dtype_l = dtype_l ^ 0x3;
+  }
+  return dtype_l;
+}
+
+int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
+  int n = GetSVEBitN();
+  int imm_s = GetSVEImmSetBits();
+  unsigned type_bitset =
+      (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width));
+
+  // An lane size is constructed from the n and imm_s bits according to
+  // the following table:
+  //
+  // N   imms   size
+  // 0  0xxxxx   32
+  // 0  10xxxx   16
+  // 0  110xxx    8
+  // 0  1110xx    8
+  // 0  11110x    8
+  // 1  xxxxxx   64
+
+  if (type_bitset == 0) {
+    // Bail out early since `HighestSetBitPosition` doesn't accept zero
+    // value input.
+    return -1;
+  }
+
+  switch (HighestSetBitPosition(type_bitset)) {
+    case 6:
+      return kDRegSizeInBytesLog2;
+    case 5:
+      return kSRegSizeInBytesLog2;
+    case 4:
+      return kHRegSizeInBytesLog2;
+    case 3:
+    case 2:
+    case 1:
+      return kBRegSizeInBytesLog2;
+    default:
+      // RESERVED encoding.
+      return -1;
+  }
+}
+
+uint64_t Instruction::DecodeImmBitMask(int32_t n,
+                                       int32_t imm_s,
+                                       int32_t imm_r,
+                                       int32_t size) const {
   // An integer is constructed from the n, imm_s and imm_r bits according to
   // the following table:
   //
@@ -146,7 +755,7 @@ uint64_t Instruction::GetImmLogical() const {
           return 0;
         }
         uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1;
-        return RepeatBitsAcrossReg(reg_size,
+        return RepeatBitsAcrossReg(size,
                                    RotateRight(bits, imm_r & mask, width),
                                    width);
       }
@@ -397,8 +1006,6 @@ void Instruction::SetImmLLiteral(const Instruction* source) {
 
 
 VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
-  VIXL_ASSERT(vform == kFormat8H || vform == kFormat4S || vform == kFormat2D ||
-              vform == kFormatH || vform == kFormatS || vform == kFormatD);
   switch (vform) {
     case kFormat8H:
       return kFormat8B;
@@ -412,6 +1019,13 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
       return kFormatH;
     case kFormatD:
       return kFormatS;
+    case kFormatVnH:
+      return kFormatVnB;
+    case kFormatVnS:
+      return kFormatVnH;
+    case kFormatVnD:
+      return kFormatVnS;
+      break;
     default:
       VIXL_UNREACHABLE();
       return kFormatUndefined;
@@ -480,6 +1094,12 @@ VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) {
       return kFormat2S;
     case kFormat2D:
       return kFormat4S;
+    case kFormatVnH:
+      return kFormatVnB;
+    case kFormatVnS:
+      return kFormatVnH;
+    case kFormatVnD:
+      return kFormatVnS;
     default:
       VIXL_UNREACHABLE();
       return kFormatUndefined;
@@ -518,8 +1138,8 @@ VectorFormat VectorFormatHalfLanes(VectorFormat vform) {
 }
 
 
-VectorFormat ScalarFormatFromLaneSize(int laneSize) {
-  switch (laneSize) {
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) {
+  switch (lane_size_in_bits) {
     case 8:
       return kFormatB;
     case 16:
@@ -535,6 +1155,69 @@ VectorFormat ScalarFormatFromLaneSize(int laneSize) {
 }
 
 
+bool IsSVEFormat(VectorFormat vform) {
+  switch (vform) {
+    case kFormatVnB:
+    case kFormatVnH:
+    case kFormatVnS:
+    case kFormatVnD:
+    case kFormatVnQ:
+      return true;
+    default:
+      return false;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) {
+  switch (lane_size_in_bytes) {
+    case 1:
+      return kFormatVnB;
+    case 2:
+      return kFormatVnH;
+    case 4:
+      return kFormatVnS;
+    case 8:
+      return kFormatVnD;
+    case 16:
+      return kFormatVnQ;
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) {
+  switch (lane_size_in_bits) {
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+      return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte);
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) {
+  switch (lane_size_in_bytes_log2) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2);
+    default:
+      VIXL_UNREACHABLE();
+      return kFormatUndefined;
+  }
+}
+
+
 VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
   return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
 }
@@ -542,6 +1225,7 @@ VectorFormat ScalarFormatFromFormat(VectorFormat vform) {
 
 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
   VIXL_ASSERT(vform != kFormatUndefined);
+  VIXL_ASSERT(!IsSVEFormat(vform));
   switch (vform) {
     case kFormatB:
       return kBRegSize;
@@ -551,14 +1235,19 @@ unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) {
     case kFormat2H:
       return kSRegSize;
     case kFormatD:
-      return kDRegSize;
     case kFormat8B:
     case kFormat4H:
     case kFormat2S:
     case kFormat1D:
       return kDRegSize;
-    default:
+    case kFormat16B:
+    case kFormat8H:
+    case kFormat4S:
+    case kFormat2D:
       return kQRegSize;
+    default:
+      VIXL_UNREACHABLE();
+      return 0;
   }
 }
 
@@ -574,20 +1263,26 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
     case kFormatB:
     case kFormat8B:
     case kFormat16B:
+    case kFormatVnB:
       return 8;
     case kFormatH:
     case kFormat2H:
     case kFormat4H:
     case kFormat8H:
+    case kFormatVnH:
       return 16;
     case kFormatS:
     case kFormat2S:
     case kFormat4S:
+    case kFormatVnS:
       return 32;
     case kFormatD:
     case kFormat1D:
     case kFormat2D:
+    case kFormatVnD:
       return 64;
+    case kFormatVnQ:
+      return 128;
     default:
       VIXL_UNREACHABLE();
       return 0;
@@ -606,20 +1301,26 @@ int LaneSizeInBytesLog2FromFormat(VectorFormat vform) {
     case kFormatB:
     case kFormat8B:
     case kFormat16B:
+    case kFormatVnB:
       return 0;
     case kFormatH:
     case kFormat2H:
     case kFormat4H:
     case kFormat8H:
+    case kFormatVnH:
       return 1;
     case kFormatS:
     case kFormat2S:
     case kFormat4S:
+    case kFormatVnS:
       return 2;
     case kFormatD:
     case kFormat1D:
     case kFormat2D:
+    case kFormatVnD:
       return 3;
+    case kFormatVnQ:
+      return 4;
     default:
       VIXL_UNREACHABLE();
       return 0;
@@ -697,17 +1398,19 @@ bool IsVectorFormat(VectorFormat vform) {
 
 
 int64_t MaxIntFromFormat(VectorFormat vform) {
-  return INT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  return static_cast<int64_t>(GetUintMask(lane_size) >> 1);
 }
 
 
 int64_t MinIntFromFormat(VectorFormat vform) {
-  return INT64_MIN >> (64 - LaneSizeInBitsFromFormat(vform));
+  return -MaxIntFromFormat(vform) - 1;
 }
 
 
 uint64_t MaxUintFromFormat(VectorFormat vform) {
-  return UINT64_MAX >> (64 - LaneSizeInBitsFromFormat(vform));
+  return GetUintMask(LaneSizeInBitsFromFormat(vform));
 }
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index 6d4f96b4..5f56ae16 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -81,6 +81,7 @@ const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff);
 const uint64_t kHRegMask = UINT64_C(0xffff);
 const uint64_t kSRegMask = UINT64_C(0xffffffff);
 const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff);
+const uint64_t kHSignMask = UINT64_C(0x8000);
 const uint64_t kSSignMask = UINT64_C(0x80000000);
 const uint64_t kDSignMask = UINT64_C(0x8000000000000000);
 const uint64_t kWSignMask = UINT64_C(0x80000000);
@@ -116,6 +117,30 @@ VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
 
 const uint64_t kTTBRMask = UINT64_C(1) << 55;
 
+// We can't define a static kZRegSize because the size depends on the
+// implementation. However, it is sometimes useful to know the minimum and
+// maxmimum possible sizes.
+const unsigned kZRegMinSize = 128;
+const unsigned kZRegMinSizeLog2 = 7;
+const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8;
+const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kZRegMaxSize = 2048;
+const unsigned kZRegMaxSizeLog2 = 11;
+const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8;
+const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3;
+
+// The P register size depends on the Z register size.
+const unsigned kZRegBitsPerPRegBit = kBitsPerByte;
+const unsigned kZRegBitsPerPRegBitLog2 = 3;
+const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3;
+const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8;
+const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3;
+const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit;
+const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3;
+const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8;
+const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3;
+
 // Make these moved float constants backwards compatible
 // with explicit vixl::aarch64:: namespace references.
 using vixl::kDoubleMantissaBits;
@@ -151,6 +176,44 @@ enum AddrMode { Offset, PreIndex, PostIndex };
 
 enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister };
 
+enum VectorFormat {
+  kFormatUndefined = 0xffffffff,
+  kFormat8B = NEON_8B,
+  kFormat16B = NEON_16B,
+  kFormat4H = NEON_4H,
+  kFormat8H = NEON_8H,
+  kFormat2S = NEON_2S,
+  kFormat4S = NEON_4S,
+  kFormat1D = NEON_1D,
+  kFormat2D = NEON_2D,
+
+  // Scalar formats. We add the scalar bit to distinguish between scalar and
+  // vector enumerations; the bit is always set in the encoding of scalar ops
+  // and always clear for vector ops. Although kFormatD and kFormat1D appear
+  // to be the same, their meaning is subtly different. The first is a scalar
+  // operation, the second a vector operation that only affects one lane.
+  kFormatB = NEON_B | NEONScalar,
+  kFormatH = NEON_H | NEONScalar,
+  kFormatS = NEON_S | NEONScalar,
+  kFormatD = NEON_D | NEONScalar,
+
+  // An artificial value, used to distinguish from NEON format category.
+  kFormatSVE = 0x0000fffd,
+  // An artificial value. Q lane size isn't encoded in the usual size field.
+  kFormatSVEQ = 0x000f0000,
+  // Vector element width of SVE register with the unknown lane count since
+  // the vector length is implementation dependent.
+  kFormatVnB = SVE_B | kFormatSVE,
+  kFormatVnH = SVE_H | kFormatSVE,
+  kFormatVnS = SVE_S | kFormatSVE,
+  kFormatVnD = SVE_D | kFormatSVE,
+  kFormatVnQ = kFormatSVEQ | kFormatSVE,
+
+  // An artificial value, used by simulator trace tests and a few oddball
+  // instructions (such as FMLAL).
+  kFormat2H = 0xfffffffe
+};
+
 // Instructions. ---------------------------------------------------------------
 
 class Instruction {
@@ -229,6 +292,29 @@ class Instruction {
   INSTRUCTION_FIELDS_LIST(DEFINE_GETTER)
 #undef DEFINE_GETTER
 
+  template <int msb, int lsb>
+  int32_t GetRx() const {
+    // We don't have any register fields wider than five bits, so the result
+    // will always fit into an int32_t.
+    VIXL_ASSERT((msb - lsb + 1) <= 5);
+    return this->ExtractBits(msb, lsb);
+  }
+
+  VectorFormat GetSVEVectorFormat() const {
+    switch (Mask(SVESizeFieldMask)) {
+      case SVE_B:
+        return kFormatVnB;
+      case SVE_H:
+        return kFormatVnH;
+      case SVE_S:
+        return kFormatVnS;
+      case SVE_D:
+        return kFormatVnD;
+    }
+    VIXL_UNREACHABLE();
+    return kFormatUndefined;
+  }
+
   // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST),
   // formed from ImmPCRelLo and ImmPCRelHi.
   int GetImmPCRel() const {
@@ -254,6 +340,20 @@ class Instruction {
   VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) {
     return GetImmLogical();
   }
+  uint64_t GetSVEImmLogical() const;
+  int GetSVEBitwiseImmLaneSizeInBytesLog2() const;
+  uint64_t DecodeImmBitMask(int32_t n,
+                            int32_t imm_s,
+                            int32_t imm_r,
+                            int32_t size) const;
+
+  std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
+
+  std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
+
+  int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const;
+
+  int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const;
 
   unsigned GetImmNEONabcdefgh() const;
   VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) {
@@ -280,6 +380,16 @@ class Instruction {
     return GetImmNEONFP64();
   }
 
+  Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); }
+
+  float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); }
+
+  double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); }
+
+  static Float16 Imm8ToFloat16(uint32_t imm8);
+  static float Imm8ToFP32(uint32_t imm8);
+  static double Imm8ToFP64(uint32_t imm8);
+
   unsigned GetSizeLS() const {
     return CalcLSDataSize(static_cast<LoadStoreOp>(Mask(LoadStoreMask)));
   }
@@ -342,6 +452,9 @@ class Instruction {
     return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed;
   }
 
+  // True if `this` is valid immediately after the provided movprfx instruction.
+  bool CanTakeSVEMovprfx(Instruction const* movprfx) const;
+
   bool IsLoad() const;
   bool IsStore() const;
 
@@ -557,41 +670,12 @@ class Instruction {
  private:
   int GetImmBranch() const;
 
-  static Float16 Imm8ToFloat16(uint32_t imm8);
-  static float Imm8ToFP32(uint32_t imm8);
-  static double Imm8ToFP64(uint32_t imm8);
-
   void SetPCRelImmTarget(const Instruction* target);
   void SetBranchImmTarget(const Instruction* target);
 };
 
 
-// Functions for handling NEON vector format information.
-enum VectorFormat {
-  kFormatUndefined = 0xffffffff,
-  kFormat8B = NEON_8B,
-  kFormat16B = NEON_16B,
-  kFormat4H = NEON_4H,
-  kFormat8H = NEON_8H,
-  kFormat2S = NEON_2S,
-  kFormat4S = NEON_4S,
-  kFormat1D = NEON_1D,
-  kFormat2D = NEON_2D,
-
-  // Scalar formats. We add the scalar bit to distinguish between scalar and
-  // vector enumerations; the bit is always set in the encoding of scalar ops
-  // and always clear for vector ops. Although kFormatD and kFormat1D appear
-  // to be the same, their meaning is subtly different. The first is a scalar
-  // operation, the second a vector operation that only affects one lane.
-  kFormatB = NEON_B | NEONScalar,
-  kFormatH = NEON_H | NEONScalar,
-  kFormatS = NEON_S | NEONScalar,
-  kFormatD = NEON_D | NEONScalar,
-
-  // An artificial value, used by simulator trace tests and a few oddball
-  // instructions (such as FMLAL).
-  kFormat2H = 0xfffffffe
-};
+// Functions for handling NEON and SVE vector format information.
 
 const int kMaxLanesPerVector = 16;
 
@@ -599,12 +683,16 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleWidth(VectorFormat vform);
 VectorFormat VectorFormatDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatHalfLanes(VectorFormat vform);
-VectorFormat ScalarFormatFromLaneSize(int lanesize);
+VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits);
 VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform);
 VectorFormat VectorFormatFillQ(VectorFormat vform);
 VectorFormat ScalarFormatFromFormat(VectorFormat vform);
+VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits);
+VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes);
+VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2);
 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform);
 unsigned RegisterSizeInBytesFromFormat(VectorFormat vform);
+bool IsSVEFormat(VectorFormat vform);
 // TODO: Make the return types of these functions consistent.
 unsigned LaneSizeInBitsFromFormat(VectorFormat vform);
 int LaneSizeInBytesFromFormat(VectorFormat vform);
diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc
deleted file mode 100644
index 7cb6b20e..00000000
--- a/src/aarch64/instrument-aarch64.cc
+++ /dev/null
@@ -1,975 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of ARM Limited nor the names of its contributors may be
-//     used to endorse or promote products derived from this software without
-//     specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-Counter::Counter(const char* name, CounterType type)
-    : count_(0), enabled_(false), type_(type) {
-  VIXL_ASSERT(name != NULL);
-  strncpy(name_, name, kCounterNameMaxLength - 1);
-  // Make sure `name_` is always NULL-terminated, even if the source's length is
-  // higher.
-  name_[kCounterNameMaxLength - 1] = '\0';
-}
-
-
-void Counter::Enable() { enabled_ = true; }
-
-
-void Counter::Disable() { enabled_ = false; }
-
-
-bool Counter::IsEnabled() { return enabled_; }
-
-
-void Counter::Increment() {
-  if (enabled_) {
-    count_++;
-  }
-}
-
-
-uint64_t Counter::GetCount() {
-  uint64_t result = count_;
-  if (type_ == Gauge) {
-    // If the counter is a Gauge, reset the count after reading.
-    count_ = 0;
-  }
-  return result;
-}
-
-
-const char* Counter::GetName() { return name_; }
-
-
-CounterType Counter::GetType() { return type_; }
-
-
-struct CounterDescriptor {
-  const char* name;
-  CounterType type;
-};
-
-
-static const CounterDescriptor kCounterList[] =
-    {{"Instruction", Cumulative},
-
-     {"Move Immediate", Gauge},
-     {"Add/Sub DP", Gauge},
-     {"Logical DP", Gauge},
-     {"Other Int DP", Gauge},
-     {"FP DP", Gauge},
-
-     {"Conditional Select", Gauge},
-     {"Conditional Compare", Gauge},
-
-     {"Unconditional Branch", Gauge},
-     {"Compare and Branch", Gauge},
-     {"Test and Branch", Gauge},
-     {"Conditional Branch", Gauge},
-
-     {"Load Integer", Gauge},
-     {"Load FP", Gauge},
-     {"Load Pair", Gauge},
-     {"Load Literal", Gauge},
-
-     {"Store Integer", Gauge},
-     {"Store FP", Gauge},
-     {"Store Pair", Gauge},
-
-     {"PC Addressing", Gauge},
-     {"Other", Gauge},
-     {"NEON", Gauge},
-     {"Crypto", Gauge}};
-
-
-Instrument::Instrument(const char* datafile, uint64_t sample_period)
-    : output_stream_(stdout), sample_period_(sample_period) {
-  // Set up the output stream. If datafile is non-NULL, use that file. If it
-  // can't be opened, or datafile is NULL, use stdout.
-  if (datafile != NULL) {
-    output_stream_ = fopen(datafile, "w");
-    if (output_stream_ == NULL) {
-      printf("Can't open output file %s. Using stdout.\n", datafile);
-      output_stream_ = stdout;
-    }
-  }
-
-  static const int num_counters =
-      sizeof(kCounterList) / sizeof(CounterDescriptor);
-
-  // Dump an instrumentation description comment at the top of the file.
-  fprintf(output_stream_, "# counters=%d\n", num_counters);
-  fprintf(output_stream_, "# sample_period=%" PRIu64 "\n", sample_period_);
-
-  // Construct Counter objects from counter description array.
-  for (int i = 0; i < num_counters; i++) {
-    Counter* counter = new Counter(kCounterList[i].name, kCounterList[i].type);
-    counters_.push_back(counter);
-  }
-
-  DumpCounterNames();
-}
-
-
-Instrument::~Instrument() {
-  // Dump any remaining instruction data to the output file.
-  DumpCounters();
-
-  // Free all the counter objects.
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    delete *it;
-  }
-
-  if (output_stream_ != stdout) {
-    fclose(output_stream_);
-  }
-}
-
-
-void Instrument::Update() {
-  // Increment the instruction counter, and dump all counters if a sample period
-  // has elapsed.
-  static Counter* counter = GetCounter("Instruction");
-  VIXL_ASSERT(counter->GetType() == Cumulative);
-  counter->Increment();
-
-  if ((sample_period_ != 0) && counter->IsEnabled() &&
-      (counter->GetCount() % sample_period_) == 0) {
-    DumpCounters();
-  }
-}
-
-
-void Instrument::DumpCounters() {
-  // Iterate through the counter objects, dumping their values to the output
-  // stream.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    fprintf(output_stream_, "%" PRIu64 ",", (*it)->GetCount());
-  }
-  fprintf(output_stream_, "\n");
-  fflush(output_stream_);
-}
-
-
-void Instrument::DumpCounterNames() {
-  // Iterate through the counter objects, dumping the counter names to the
-  // output stream.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    fprintf(output_stream_, "%s,", (*it)->GetName());
-  }
-  fprintf(output_stream_, "\n");
-  fflush(output_stream_);
-}
-
-
-void Instrument::HandleInstrumentationEvent(unsigned event) {
-  switch (event) {
-    case InstrumentStateEnable:
-      Enable();
-      break;
-    case InstrumentStateDisable:
-      Disable();
-      break;
-    default:
-      DumpEventMarker(event);
-  }
-}
-
-
-void Instrument::DumpEventMarker(unsigned marker) {
-  // Dumpan event marker to the output stream as a specially formatted comment
-  // line.
-  static Counter* counter = GetCounter("Instruction");
-
-  fprintf(output_stream_,
-          "# %c%c @ %" PRId64 "\n",
-          marker & 0xff,
-          (marker >> 8) & 0xff,
-          counter->GetCount());
-}
-
-
-Counter* Instrument::GetCounter(const char* name) {
-  // Get a Counter object by name from the counter list.
-  std::list<Counter*>::const_iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    if (strcmp((*it)->GetName(), name) == 0) {
-      return *it;
-    }
-  }
-
-  // A Counter by that name does not exist: print an error message to stderr
-  // and the output file, and exit.
-  static const char* error_message =
-      "# Error: Unknown counter \"%s\". Exiting.\n";
-  fprintf(stderr, error_message, name);
-  fprintf(output_stream_, error_message, name);
-  exit(1);
-}
-
-
-void Instrument::Enable() {
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    (*it)->Enable();
-  }
-}
-
-
-void Instrument::Disable() {
-  std::list<Counter*>::iterator it;
-  for (it = counters_.begin(); it != counters_.end(); it++) {
-    (*it)->Disable();
-  }
-}
-
-
-void Instrument::VisitPCRelAddressing(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("PC Addressing");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitLogicalImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Logical DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitMoveWideImmediate(const Instruction* instr) {
-  Update();
-  static Counter* counter = GetCounter("Move Immediate");
-
-  if (instr->IsMovn() && (instr->GetRd() == kZeroRegCode)) {
-    unsigned imm = instr->GetImmMoveWide();
-    HandleInstrumentationEvent(imm);
-  } else {
-    counter->Increment();
-  }
-}
-
-
-void Instrument::VisitBitfield(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitExtract(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Unconditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnconditionalBranchToRegister(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Unconditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCompareBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Compare and Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitTestBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Test and Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalBranch(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Branch");
-  counter->Increment();
-}
-
-
-void Instrument::VisitSystem(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitException(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStorePair(const Instruction* instr) {
-  static Counter* load_pair_counter = GetCounter("Load Pair");
-  static Counter* store_pair_counter = GetCounter("Store Pair");
-
-  if (instr->Mask(LoadStorePairLBit) != 0) {
-    load_pair_counter->Increment();
-  } else {
-    store_pair_counter->Increment();
-  }
-}
-
-
-void Instrument::VisitLoadStorePairPostIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairPreIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStorePairNonTemporal(const Instruction* instr) {
-  Update();
-  InstrumentLoadStorePair(instr);
-}
-
-
-void Instrument::VisitLoadStoreExclusive(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAtomicMemory(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitLoadLiteral(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Load Literal");
-  counter->Increment();
-}
-
-
-void Instrument::VisitLoadStorePAC(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Load Integer");
-  counter->Increment();
-}
-
-
-void Instrument::InstrumentLoadStore(const Instruction* instr) {
-  static Counter* load_int_counter = GetCounter("Load Integer");
-  static Counter* store_int_counter = GetCounter("Store Integer");
-  static Counter* load_fp_counter = GetCounter("Load FP");
-  static Counter* store_fp_counter = GetCounter("Store FP");
-
-  switch (instr->Mask(LoadStoreMask)) {
-    case STRB_w:
-    case STRH_w:
-    case STR_w:
-      VIXL_FALLTHROUGH();
-    case STR_x:
-      store_int_counter->Increment();
-      break;
-    case STR_s:
-      VIXL_FALLTHROUGH();
-    case STR_d:
-      store_fp_counter->Increment();
-      break;
-    case LDRB_w:
-    case LDRH_w:
-    case LDR_w:
-    case LDR_x:
-    case LDRSB_x:
-    case LDRSH_x:
-    case LDRSW_x:
-    case LDRSB_w:
-      VIXL_FALLTHROUGH();
-    case LDRSH_w:
-      load_int_counter->Increment();
-      break;
-    case LDR_s:
-      VIXL_FALLTHROUGH();
-    case LDR_d:
-      load_fp_counter->Increment();
-      break;
-  }
-}
-
-
-void Instrument::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePostIndex(const Instruction* instr) {
-  USE(instr);
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStorePreIndex(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-void Instrument::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
-  Update();
-  switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
-    case STLURB:
-    case STLURH:
-    case STLUR_w:
-    case STLUR_x: {
-      static Counter* counter = GetCounter("Store Integer");
-      counter->Increment();
-      break;
-    }
-    case LDAPURB:
-    case LDAPURSB_w:
-    case LDAPURSB_x:
-    case LDAPURH:
-    case LDAPURSH_w:
-    case LDAPURSH_x:
-    case LDAPUR_w:
-    case LDAPURSW:
-    case LDAPUR_x: {
-      static Counter* counter = GetCounter("Load Integer");
-      counter->Increment();
-      break;
-    }
-  }
-}
-
-
-void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
-  Update();
-  InstrumentLoadStore(instr);
-}
-
-
-void Instrument::VisitLogicalShifted(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Logical DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubShifted(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubExtended(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitAddSubWithCarry(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Add/Sub DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitRotateRightIntoFlags(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitEvaluateIntoFlags(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareRegister(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalCompareImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitConditionalSelect(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Select");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing1Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing2Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitDataProcessing3Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other Int DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPCompare(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalCompare(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Compare");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPConditionalSelect(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Conditional Select");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing1Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing2Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPDataProcessing3Source(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPIntegerConvert(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitFPFixedPointConvert(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("FP DP");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCrypto2RegSHA(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCrypto3RegSHA(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitCryptoAES(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Crypto");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMisc(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Same(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEON3Different(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONAcrossLanes(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONByIndexedElement(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONCopy(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONExtract(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreMultiStructPostIndex(
-    const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONLoadStoreSingleStructPostIndex(
-    const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONModifiedImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMisc(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3Same(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarByIndexedElement(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarCopy(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarPairwise(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONScalarShiftImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONShiftImmediate(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONTable(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitNEONPerm(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("NEON");
-  counter->Increment();
-}
-
-
-void Instrument::VisitReserved(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnallocated(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-void Instrument::VisitUnimplemented(const Instruction* instr) {
-  USE(instr);
-  Update();
-  static Counter* counter = GetCounter("Other");
-  counter->Increment();
-}
-
-
-}  // namespace aarch64
-}  // namespace vixl
diff --git a/src/aarch64/instrument-aarch64.h b/src/aarch64/instrument-aarch64.h
deleted file mode 100644
index 4401b3ea..00000000
--- a/src/aarch64/instrument-aarch64.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2014, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of ARM Limited nor the names of its contributors may be
-//     used to endorse or promote products derived from this software without
-//     specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-#define VIXL_AARCH64_INSTRUMENT_AARCH64_H_
-
-#include "../globals-vixl.h"
-#include "../utils-vixl.h"
-
-#include "constants-aarch64.h"
-#include "decoder-aarch64.h"
-#include "instrument-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-const int kCounterNameMaxLength = 256;
-const uint64_t kDefaultInstrumentationSamplingPeriod = 1 << 22;
-
-
-enum InstrumentState { InstrumentStateDisable = 0, InstrumentStateEnable = 1 };
-
-
-enum CounterType {
-  Gauge = 0,      // Gauge counters reset themselves after reading.
-  Cumulative = 1  // Cumulative counters keep their value after reading.
-};
-
-
-class Counter {
- public:
-  explicit Counter(const char* name, CounterType type = Gauge);
-
-  void Increment();
-  void Enable();
-  void Disable();
-  bool IsEnabled();
-  uint64_t GetCount();
-  VIXL_DEPRECATED("GetCount", uint64_t count()) { return GetCount(); }
-
-  const char* GetName();
-  VIXL_DEPRECATED("GetName", const char* name()) { return GetName(); }
-
-  CounterType GetType();
-  VIXL_DEPRECATED("GetType", CounterType type()) { return GetType(); }
-
- private:
-  char name_[kCounterNameMaxLength];
-  uint64_t count_;
-  bool enabled_;
-  CounterType type_;
-};
-
-
-class Instrument : public DecoderVisitor {
- public:
-  explicit Instrument(
-      const char* datafile = NULL,
-      uint64_t sample_period = kDefaultInstrumentationSamplingPeriod);
-  ~Instrument();
-
-  void Enable();
-  void Disable();
-
-// Declare all Visitor functions.
-#define DECLARE(A) void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
-  VISITOR_LIST(DECLARE)
-#undef DECLARE
-
- private:
-  void Update();
-  void DumpCounters();
-  void DumpCounterNames();
-  void DumpEventMarker(unsigned marker);
-  void HandleInstrumentationEvent(unsigned event);
-  Counter* GetCounter(const char* name);
-
-  void InstrumentLoadStore(const Instruction* instr);
-  void InstrumentLoadStorePair(const Instruction* instr);
-
-  std::list<Counter*> counters_;
-
-  FILE* output_stream_;
-
-  // Counter information is dumped every sample_period_ instructions decoded.
-  // For a sample_period_ = 0 a final counter value is only produced when the
-  // Instrumentation class is destroyed.
-  uint64_t sample_period_;
-};
-
-}  // namespace aarch64
-}  // namespace vixl
-
-#endif  // VIXL_AARCH64_INSTRUMENT_AARCH64_H_
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index e7ede2f9..cab02573 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -184,14 +184,28 @@ void Simulator::ld1(VectorFormat vform,
 }
 
 
-void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+void Simulator::ld1r(VectorFormat vform,
+                     VectorFormat unpack_vform,
+                     LogicVRegister dst,
+                     uint64_t addr,
+                     bool is_signed) {
+  unsigned unpack_size = LaneSizeInBitsFromFormat(unpack_vform);
   dst.ClearForWrite(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    dst.ReadUintFromMem(vform, i, addr);
+    if (is_signed) {
+      dst.ReadIntFromMem(vform, unpack_size, i, addr);
+    } else {
+      dst.ReadUintFromMem(vform, unpack_size, i, addr);
+    }
   }
 }
 
 
+void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
+  ld1r(vform, vform, dst, addr);
+}
+
+
 void Simulator::ld2(VectorFormat vform,
                     LogicVRegister dst1,
                     LogicVRegister dst2,
@@ -550,6 +564,7 @@ LogicVRegister Simulator::add(VectorFormat vform,
                               const LogicVRegister& src2) {
   int lane_size = LaneSizeInBitsFromFormat(vform);
   dst.ClearForWrite(vform);
+
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     // Test for unsigned saturation.
     uint64_t ua = src1.UintLeftJustified(vform, i);
@@ -568,12 +583,39 @@ LogicVRegister Simulator::add(VectorFormat vform,
     if ((pos_a == pos_b) && (pos_a != pos_r)) {
       dst.SetSignedSat(i, pos_a);
     }
-
     dst.SetInt(vform, i, ur >> (64 - lane_size));
   }
   return dst;
 }
 
+LogicVRegister Simulator::add_uint(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src1,
+                                   uint64_t value) {
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  VIXL_ASSERT(IsUintN(lane_size, value));
+  dst.ClearForWrite(vform);
+  // Left-justify `value`.
+  uint64_t ub = value << (64 - lane_size);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for unsigned saturation.
+    uint64_t ua = src1.UintLeftJustified(vform, i);
+    uint64_t ur = ua + ub;
+    if (ur < ua) {
+      dst.SetUnsignedSat(i, true);
+    }
+
+    // Test for signed saturation.
+    // `value` is always positive, so we have an overflow if the (signed) result
+    // is smaller than the first operand.
+    if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
+      dst.SetSignedSat(i, true);
+    }
+
+    dst.SetInt(vform, i, ur >> (64 - lane_size));
+  }
+  return dst;
+}
 
 LogicVRegister Simulator::addp(VectorFormat vform,
                                LogicVRegister dst,
@@ -586,25 +628,68 @@ LogicVRegister Simulator::addp(VectorFormat vform,
   return dst;
 }
 
+LogicVRegister Simulator::sdiv(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t val1 = src1.Int(vform, i);
+    int64_t val2 = src2.Int(vform, i);
+    int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
+    int64_t quotient = 0;
+    if ((val1 == min_int) && (val2 == -1)) {
+      quotient = min_int;
+    } else if (val2 != 0) {
+      quotient = val1 / val2;
+    }
+    dst.SetInt(vform, i, quotient);
+  }
+
+  return dst;
+}
+
+LogicVRegister Simulator::udiv(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               const LogicVRegister& src2) {
+  VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t val1 = src1.Uint(vform, i);
+    uint64_t val2 = src2.Uint(vform, i);
+    uint64_t quotient = 0;
+    if (val2 != 0) {
+      quotient = val1 / val2;
+    }
+    dst.SetUint(vform, i, quotient);
+  }
+
+  return dst;
+}
+
 
 LogicVRegister Simulator::mla(VectorFormat vform,
                               LogicVRegister dst,
+                              const LogicVRegister& srca,
                               const LogicVRegister& src1,
                               const LogicVRegister& src2) {
   SimVRegister temp;
   mul(vform, temp, src1, src2);
-  add(vform, dst, dst, temp);
+  add(vform, dst, srca, temp);
   return dst;
 }
 
 
 LogicVRegister Simulator::mls(VectorFormat vform,
                               LogicVRegister dst,
+                              const LogicVRegister& srca,
                               const LogicVRegister& src1,
                               const LogicVRegister& src2) {
   SimVRegister temp;
   mul(vform, temp, src1, src2);
-  sub(vform, dst, dst, temp);
+  sub(vform, dst, srca, temp);
   return dst;
 }
 
@@ -614,6 +699,7 @@ LogicVRegister Simulator::mul(VectorFormat vform,
                               const LogicVRegister& src1,
                               const LogicVRegister& src2) {
   dst.ClearForWrite(vform);
+
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
   }
@@ -632,6 +718,70 @@ LogicVRegister Simulator::mul(VectorFormat vform,
 }
 
 
+LogicVRegister Simulator::smulh(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    int64_t dst_val;
+    int64_t val1 = src1.Int(vform, i);
+    int64_t val2 = src2.Int(vform, i);
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8:
+        dst_val = internal::MultiplyHigh<8>(val1, val2);
+        break;
+      case 16:
+        dst_val = internal::MultiplyHigh<16>(val1, val2);
+        break;
+      case 32:
+        dst_val = internal::MultiplyHigh<32>(val1, val2);
+        break;
+      case 64:
+        dst_val = internal::MultiplyHigh<64>(val1, val2);
+        break;
+      default:
+        dst_val = 0xbadbeef;
+        VIXL_UNREACHABLE();
+        break;
+    }
+    dst.SetInt(vform, i, dst_val);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::umulh(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t dst_val;
+    uint64_t val1 = src1.Uint(vform, i);
+    uint64_t val2 = src2.Uint(vform, i);
+    switch (LaneSizeInBitsFromFormat(vform)) {
+      case 8:
+        dst_val = internal::MultiplyHigh<8>(val1, val2);
+        break;
+      case 16:
+        dst_val = internal::MultiplyHigh<16>(val1, val2);
+        break;
+      case 32:
+        dst_val = internal::MultiplyHigh<32>(val1, val2);
+        break;
+      case 64:
+        dst_val = internal::MultiplyHigh<64>(val1, val2);
+        break;
+      default:
+        dst_val = 0xbadbeef;
+        VIXL_UNREACHABLE();
+        break;
+    }
+    dst.SetUint(vform, i, dst_val);
+  }
+  return dst;
+}
+
+
 LogicVRegister Simulator::mla(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src1,
@@ -639,7 +789,7 @@ LogicVRegister Simulator::mla(VectorFormat vform,
                               int index) {
   SimVRegister temp;
   VectorFormat indexform = VectorFormatFillQ(vform);
-  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
+  return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
 }
 
 
@@ -650,7 +800,7 @@ LogicVRegister Simulator::mls(VectorFormat vform,
                               int index) {
   SimVRegister temp;
   VectorFormat indexform = VectorFormatFillQ(vform);
-  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
+  return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
 }
 
 
@@ -898,8 +1048,14 @@ LogicVRegister Simulator::sdot(VectorFormat vform,
                                const LogicVRegister& src2,
                                int index) {
   SimVRegister temp;
-  VectorFormat indexform = VectorFormatFillQ(vform);
-  return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index));
+  // NEON indexed `dot` allows the index value exceed the register size.
+  // Promote the format to Q-sized vector format before the duplication.
+  dup_elements_to_segments(IsSVEFormat(vform) ? vform
+                                              : VectorFormatFillQ(vform),
+                           temp,
+                           src2,
+                           index);
+  return sdot(vform, dst, src1, temp);
 }
 
 
@@ -920,8 +1076,14 @@ LogicVRegister Simulator::udot(VectorFormat vform,
                                const LogicVRegister& src2,
                                int index) {
   SimVRegister temp;
-  VectorFormat indexform = VectorFormatFillQ(vform);
-  return udot(vform, dst, src1, dup_element(indexform, temp, src2, index));
+  // NEON indexed `dot` allows the index value exceed the register size.
+  // Promote the format to Q-sized vector format before the duplication.
+  dup_elements_to_segments(IsSVEFormat(vform) ? vform
+                                              : VectorFormatFillQ(vform),
+                           temp,
+                           src2,
+                           index);
+  return udot(vform, dst, src1, temp);
 }
 
 
@@ -1025,6 +1187,34 @@ LogicVRegister Simulator::sub(VectorFormat vform,
   return dst;
 }
 
+LogicVRegister Simulator::sub_uint(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src1,
+                                   uint64_t value) {
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  VIXL_ASSERT(IsUintN(lane_size, value));
+  dst.ClearForWrite(vform);
+  // Left-justify `value`.
+  uint64_t ub = value << (64 - lane_size);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Test for unsigned saturation.
+    uint64_t ua = src1.UintLeftJustified(vform, i);
+    uint64_t ur = ua - ub;
+    if (ub > ua) {
+      dst.SetUnsignedSat(i, false);
+    }
+
+    // Test for signed saturation.
+    // `value` is always positive, so we have an overflow if the (signed) result
+    // is greater than the first operand.
+    if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
+      dst.SetSignedSat(i, false);
+    }
+
+    dst.SetInt(vform, i, ur >> (64 - lane_size));
+  }
+  return dst;
+}
 
 LogicVRegister Simulator::and_(VectorFormat vform,
                                LogicVRegister dst,
@@ -1091,12 +1281,12 @@ LogicVRegister Simulator::bic(VectorFormat vform,
                               const LogicVRegister& src,
                               uint64_t imm) {
   uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; ++i) {
     result[i] = src.Uint(vform, i) & ~imm;
   }
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -1298,10 +1488,13 @@ LogicVRegister Simulator::uaddlv(VectorFormat vform,
 
 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
                                    LogicVRegister dst,
+                                   const LogicPRegister& pg,
                                    const LogicVRegister& src,
                                    bool max) {
   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
     int64_t src_val = src.Int(vform, i);
     if (max) {
       dst_val = (src_val > dst_val) ? src_val : dst_val;
@@ -1318,15 +1511,35 @@ LogicVRegister Simulator::sminmaxv(VectorFormat vform,
 LogicVRegister Simulator::smaxv(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  sminmaxv(vform, dst, src, true);
+  sminmaxv(vform, dst, GetPTrue(), src, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::sminv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  sminmaxv(vform, dst, GetPTrue(), src, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::smaxv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  sminmaxv(vform, dst, pg, src, true);
   return dst;
 }
 
 
 LogicVRegister Simulator::sminv(VectorFormat vform,
                                 LogicVRegister dst,
+                                const LogicPRegister& pg,
                                 const LogicVRegister& src) {
-  sminmaxv(vform, dst, src, false);
+  VIXL_ASSERT(IsSVEFormat(vform));
+  sminmaxv(vform, dst, pg, src, false);
   return dst;
 }
 
@@ -1414,10 +1627,13 @@ LogicVRegister Simulator::uminp(VectorFormat vform,
 
 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
                                    LogicVRegister dst,
+                                   const LogicPRegister& pg,
                                    const LogicVRegister& src,
                                    bool max) {
   uint64_t dst_val = max ? 0 : UINT64_MAX;
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
     uint64_t src_val = src.Uint(vform, i);
     if (max) {
       dst_val = (src_val > dst_val) ? src_val : dst_val;
@@ -1434,7 +1650,7 @@ LogicVRegister Simulator::uminmaxv(VectorFormat vform,
 LogicVRegister Simulator::umaxv(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  uminmaxv(vform, dst, src, true);
+  uminmaxv(vform, dst, GetPTrue(), src, true);
   return dst;
 }
 
@@ -1442,7 +1658,27 @@ LogicVRegister Simulator::umaxv(VectorFormat vform,
 LogicVRegister Simulator::uminv(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  uminmaxv(vform, dst, src, false);
+  uminmaxv(vform, dst, GetPTrue(), src, false);
+  return dst;
+}
+
+
+LogicVRegister Simulator::umaxv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uminmaxv(vform, dst, pg, src, true);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uminv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uminmaxv(vform, dst, pg, src, false);
   return dst;
 }
 
@@ -1521,14 +1757,104 @@ LogicVRegister Simulator::ushll2(VectorFormat vform,
   return ushl(vform, dst, extendedreg, shiftreg);
 }
 
+std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
+                                           const LogicPRegister& pg,
+                                           const LogicVRegister& src,
+                                           int offset_from_last_active) {
+  // Untested for any other values.
+  VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
+
+  int last_active = GetLastActive(vform, pg);
+  int lane_count = LaneCountFromFormat(vform);
+  int index =
+      ((last_active + offset_from_last_active) + lane_count) % lane_count;
+  return std::make_pair(last_active >= 0, src.Uint(vform, index));
+}
+
+LogicVRegister Simulator::compact(VectorFormat vform,
+                                  LogicVRegister dst,
+                                  const LogicPRegister& pg,
+                                  const LogicVRegister& src) {
+  int j = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (pg.IsActive(vform, i)) {
+      dst.SetUint(vform, j++, src.Uint(vform, i));
+    }
+  }
+  for (; j < LaneCountFromFormat(vform); j++) {
+    dst.SetUint(vform, j, 0);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::splice(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicPRegister& pg,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  int lane_count = LaneCountFromFormat(vform);
+  int first_active = GetFirstActive(vform, pg);
+  int last_active = GetLastActive(vform, pg);
+  int dst_idx = 0;
+  uint64_t result[kZRegMaxSizeInBytes];
+
+  if (first_active >= 0) {
+    VIXL_ASSERT(last_active >= first_active);
+    VIXL_ASSERT(last_active < lane_count);
+    for (int i = first_active; i <= last_active; i++) {
+      result[dst_idx++] = src1.Uint(vform, i);
+    }
+  }
+
+  VIXL_ASSERT(dst_idx <= lane_count);
+  for (int i = dst_idx; i < lane_count; i++) {
+    result[i] = src2.Uint(vform, i - dst_idx);
+  }
+
+  for (int i = 0; i < lane_count; i++) {
+    dst.SetUint(vform, i, result[i]);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::sel(VectorFormat vform,
+                              LogicVRegister dst,
+                              const SimPRegister& pg,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2) {
+  int p_reg_bits_per_lane =
+      LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
+                              ? src1.Uint(vform, lane)
+                              : src2.Uint(vform, lane);
+    dst.SetUint(vform, lane, lane_value);
+  }
+  return dst;
+}
+
+
+LogicPRegister Simulator::sel(LogicPRegister dst,
+                              const LogicPRegister& pg,
+                              const LogicPRegister& src1,
+                              const LogicPRegister& src2) {
+  for (int i = 0; i < dst.GetChunkCount(); i++) {
+    LogicPRegister::ChunkType mask = pg.GetChunk(i);
+    LogicPRegister::ChunkType result =
+        (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
+    dst.SetChunk(i, result);
+  }
+  return dst;
+}
+
 
 LogicVRegister Simulator::sli(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src,
                               int shift) {
   dst.ClearForWrite(vform);
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; i++) {
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; i++) {
     uint64_t src_lane = src.Uint(vform, i);
     uint64_t dst_lane = dst.Uint(vform, i);
     uint64_t shifted = src_lane << shift;
@@ -1577,10 +1903,10 @@ LogicVRegister Simulator::sri(VectorFormat vform,
                               const LogicVRegister& src,
                               int shift) {
   dst.ClearForWrite(vform);
-  int laneCount = LaneCountFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
   VIXL_ASSERT((shift > 0) &&
               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
-  for (int i = 0; i < laneCount; i++) {
+  for (int i = 0; i < lane_count; i++) {
     uint64_t src_lane = src.Uint(vform, i);
     uint64_t dst_lane = dst.Uint(vform, i);
     uint64_t shifted;
@@ -1663,15 +1989,18 @@ LogicVRegister Simulator::ursra(VectorFormat vform,
 LogicVRegister Simulator::cls(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src) {
-  uint64_t result[16];
-  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; i++) {
-    result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
+  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
+
+  // Ensure that we can store one result per lane.
+  int result[kZRegMaxSizeInBytes];
+
+  for (int i = 0; i < lane_count; i++) {
+    result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -1681,38 +2010,51 @@ LogicVRegister Simulator::cls(VectorFormat vform,
 LogicVRegister Simulator::clz(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src) {
-  uint64_t result[16];
-  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; i++) {
-    result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
+  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
+
+  // Ensure that we can store one result per lane.
+  int result[kZRegMaxSizeInBytes];
+
+  for (int i = 0; i < lane_count; i++) {
+    result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
 }
 
 
+LogicVRegister Simulator::cnot(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
+    dst.SetUint(vform, i, value);
+  }
+  return dst;
+}
+
+
 LogicVRegister Simulator::cnt(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src) {
-  uint64_t result[16];
-  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; i++) {
-    uint64_t value = src.Uint(vform, i);
-    result[i] = 0;
-    for (int j = 0; j < laneSizeInBits; j++) {
-      result[i] += (value & 1);
-      value >>= 1;
-    }
+  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
+
+  // Ensure that we can store one result per lane.
+  int result[kZRegMaxSizeInBytes];
+
+  for (int i = 0; i < lane_count; i++) {
+    result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -1896,11 +2238,108 @@ LogicVRegister Simulator::abs(VectorFormat vform,
 }
 
 
+LogicVRegister Simulator::andv(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicPRegister& pg,
+                               const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    result &= src.Uint(vform, i);
+  }
+  VectorFormat vform_dst =
+      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+  dst.ClearForWrite(vform_dst);
+  dst.SetUint(vform_dst, 0, result);
+  return dst;
+}
+
+
+LogicVRegister Simulator::eorv(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicPRegister& pg,
+                               const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uint64_t result = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    result ^= src.Uint(vform, i);
+  }
+  VectorFormat vform_dst =
+      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+  dst.ClearForWrite(vform_dst);
+  dst.SetUint(vform_dst, 0, result);
+  return dst;
+}
+
+
+LogicVRegister Simulator::orv(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicPRegister& pg,
+                              const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uint64_t result = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    result |= src.Uint(vform, i);
+  }
+  VectorFormat vform_dst =
+      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+  dst.ClearForWrite(vform_dst);
+  dst.SetUint(vform_dst, 0, result);
+  return dst;
+}
+
+
+LogicVRegister Simulator::saddv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
+  int64_t result = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    // The destination register always has D-lane sizes and the source register
+    // always has S-lanes or smaller, so signed integer overflow -- undefined
+    // behaviour -- can't occur.
+    result += src.Int(vform, i);
+  }
+
+  dst.ClearForWrite(kFormatD);
+  dst.SetInt(kFormatD, 0, result);
+  return dst;
+}
+
+
+LogicVRegister Simulator::uaddv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uint64_t result = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    result += src.Uint(vform, i);
+  }
+
+  dst.ClearForWrite(kFormatD);
+  dst.SetUint(kFormatD, 0, result);
+  return dst;
+}
+
+
 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
                                         LogicVRegister dst,
-                                        bool dstIsSigned,
+                                        bool dst_is_signed,
                                         const LogicVRegister& src,
-                                        bool srcIsSigned) {
+                                        bool src_is_signed) {
   bool upperhalf = false;
   VectorFormat srcform = kFormatUndefined;
   int64_t ssrc[8];
@@ -1969,7 +2408,7 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
     }
 
     // Test for unsigned saturation
-    if (srcIsSigned) {
+    if (src_is_signed) {
       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
         dst.SetUnsignedSat(offset + i, true);
       } else if (ssrc[i] < 0) {
@@ -1982,13 +2421,13 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
     }
 
     int64_t result;
-    if (srcIsSigned) {
+    if (src_is_signed) {
       result = ssrc[i] & MaxUintFromFormat(dstform);
     } else {
       result = usrc[i] & MaxUintFromFormat(dstform);
     }
 
-    if (dstIsSigned) {
+    if (dst_is_signed) {
       dst.SetInt(dstform, offset + i, result);
     } else {
       dst.SetUint(dstform, offset + i, result);
@@ -2030,17 +2469,17 @@ LogicVRegister Simulator::absdiff(VectorFormat vform,
                                   LogicVRegister dst,
                                   const LogicVRegister& src1,
                                   const LogicVRegister& src2,
-                                  bool issigned) {
+                                  bool is_signed) {
   dst.ClearForWrite(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    if (issigned) {
-      int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
-      sr = sr > 0 ? sr : -sr;
-      dst.SetInt(vform, i, sr);
+    bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
+                                  : (src1.Uint(vform, i) > src2.Uint(vform, i));
+    // Always calculate the answer using unsigned arithmetic, to avoid
+    // implemenation-defined signed overflow.
+    if (src1_gt_src2) {
+      dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
     } else {
-      int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
-      sr = sr > 0 ? sr : -sr;
-      dst.SetUint(vform, i, sr);
+      dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
     }
   }
   return dst;
@@ -2085,15 +2524,15 @@ LogicVRegister Simulator::not_(VectorFormat vform,
 LogicVRegister Simulator::rbit(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
   uint64_t reversed_value;
   uint64_t value;
-  for (int i = 0; i < laneCount; i++) {
+  for (int i = 0; i < lane_count; i++) {
     value = src.Uint(vform, i);
     reversed_value = 0;
-    for (int j = 0; j < laneSizeInBits; j++) {
+    for (int j = 0; j < lane_size_in_bits; j++) {
       reversed_value = (reversed_value << 1) | (value & 1);
       value >>= 1;
     }
@@ -2101,7 +2540,7 @@ LogicVRegister Simulator::rbit(VectorFormat vform,
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -2110,19 +2549,33 @@ LogicVRegister Simulator::rbit(VectorFormat vform,
 
 LogicVRegister Simulator::rev(VectorFormat vform,
                               LogicVRegister dst,
-                              const LogicVRegister& src,
-                              int revSize) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int laneSize = LaneSizeInBytesFromFormat(vform);
-  int lanesPerLoop = revSize / laneSize;
-  for (int i = 0; i < laneCount; i += lanesPerLoop) {
-    for (int j = 0; j < lanesPerLoop; j++) {
-      result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
+                              const LogicVRegister& src) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count / 2; i++) {
+    uint64_t t = src.Uint(vform, i);
+    dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
+    dst.SetUint(vform, lane_count - i - 1, t);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::rev_byte(VectorFormat vform,
+                                   LogicVRegister dst,
+                                   const LogicVRegister& src,
+                                   int rev_size) {
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int lane_size = LaneSizeInBytesFromFormat(vform);
+  int lanes_per_loop = rev_size / lane_size;
+  for (int i = 0; i < lane_count; i += lanes_per_loop) {
+    for (int j = 0; j < lanes_per_loop; j++) {
+      result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
     }
   }
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -2132,21 +2585,21 @@ LogicVRegister Simulator::rev(VectorFormat vform,
 LogicVRegister Simulator::rev16(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  return rev(vform, dst, src, 2);
+  return rev_byte(vform, dst, src, 2);
 }
 
 
 LogicVRegister Simulator::rev32(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  return rev(vform, dst, src, 4);
+  return rev_byte(vform, dst, src, 4);
 }
 
 
 LogicVRegister Simulator::rev64(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  return rev(vform, dst, src, 8);
+  return rev_byte(vform, dst, src, 8);
 }
 
 
@@ -2215,22 +2668,60 @@ LogicVRegister Simulator::ext(VectorFormat vform,
                               const LogicVRegister& src1,
                               const LogicVRegister& src2,
                               int index) {
-  uint8_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount - index; ++i) {
+  uint8_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count - index; ++i) {
     result[i] = src1.Uint(vform, i + index);
   }
   for (int i = 0; i < index; ++i) {
-    result[laneCount - index + i] = src2.Uint(vform, i);
+    result[lane_count - index + i] = src2.Uint(vform, i);
   }
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
 }
 
 template <typename T>
+LogicVRegister Simulator::fadda(VectorFormat vform,
+                                LogicVRegister acc,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  T result = acc.Float<T>(0);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    result = FPAdd(result, src.Float<T>(i));
+  }
+  VectorFormat vform_dst =
+      ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
+  acc.ClearForWrite(vform_dst);
+  acc.SetFloat(0, result);
+  return acc;
+}
+
+LogicVRegister Simulator::fadda(VectorFormat vform,
+                                LogicVRegister acc,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src) {
+  switch (LaneSizeInBitsFromFormat(vform)) {
+    case kHRegSize:
+      fadda<SimFloat16>(vform, acc, pg, src);
+      break;
+    case kSRegSize:
+      fadda<float>(vform, acc, pg, src);
+      break;
+    case kDRegSize:
+      fadda<double>(vform, acc, pg, src);
+      break;
+    default:
+      VIXL_UNREACHABLE();
+  }
+  return acc;
+}
+
+template <typename T>
 LogicVRegister Simulator::fcadd(VectorFormat vform,
                                 LogicVRegister dst,          // d
                                 const LogicVRegister& src1,  // n
@@ -2273,7 +2764,7 @@ LogicVRegister Simulator::fcadd(VectorFormat vform,
                                 const LogicVRegister& src2,  // m
                                 int rot) {
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    VIXL_UNIMPLEMENTED();
+    fcadd<SimFloat16>(vform, dst, src1, src2, rot);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     fcadd<float>(vform, dst, src1, src2, rot);
   } else {
@@ -2283,12 +2774,12 @@ LogicVRegister Simulator::fcadd(VectorFormat vform,
   return dst;
 }
 
-
 template <typename T>
 LogicVRegister Simulator::fcmla(VectorFormat vform,
-                                LogicVRegister dst,          // d
-                                const LogicVRegister& src1,  // n
-                                const LogicVRegister& src2,  // m
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                const LogicVRegister& acc,
                                 int index,
                                 int rot) {
   int elements = LaneCountFromFormat(vform);
@@ -2301,83 +2792,33 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
 
   for (int e = 0; e <= (elements / 2) - 1; e++) {
-    switch (rot) {
-      case 0:
-        element1 = src2.Float<T>(index * 2);
-        element2 = src1.Float<T>(e * 2);
-        element3 = src2.Float<T>(index * 2 + 1);
-        element4 = src1.Float<T>(e * 2);
-        break;
-      case 90:
-        element1 = FPNeg(src2.Float<T>(index * 2 + 1));
-        element2 = src1.Float<T>(e * 2 + 1);
-        element3 = src2.Float<T>(index * 2);
-        element4 = src1.Float<T>(e * 2 + 1);
-        break;
-      case 180:
-        element1 = FPNeg(src2.Float<T>(index * 2));
-        element2 = src1.Float<T>(e * 2);
-        element3 = FPNeg(src2.Float<T>(index * 2 + 1));
-        element4 = src1.Float<T>(e * 2);
-        break;
-      case 270:
-        element1 = src2.Float<T>(index * 2 + 1);
-        element2 = src1.Float<T>(e * 2 + 1);
-        element3 = FPNeg(src2.Float<T>(index * 2));
-        element4 = src1.Float<T>(e * 2 + 1);
-        break;
-      default:
-        VIXL_UNREACHABLE();
-        return dst;  // prevents "element(n) may be unintialized" errors
-    }
-    dst.ClearForWrite(vform);
-    dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
-    dst.SetFloat<T>(e * 2 + 1,
-                    FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
-  }
-  return dst;
-}
-
-
-template <typename T>
-LogicVRegister Simulator::fcmla(VectorFormat vform,
-                                LogicVRegister dst,          // d
-                                const LogicVRegister& src1,  // n
-                                const LogicVRegister& src2,  // m
-                                int rot) {
-  int elements = LaneCountFromFormat(vform);
-
-  T element1, element2, element3, element4;
-  rot *= 90;
-
-  // Loop example:
-  // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
-  // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
+    // Index == -1 indicates a vector/vector rather than vector/indexed-element
+    // operation.
+    int f = (index < 0) ? e : index;
 
-  for (int e = 0; e <= (elements / 2) - 1; e++) {
     switch (rot) {
       case 0:
-        element1 = src2.Float<T>(e * 2);
+        element1 = src2.Float<T>(f * 2);
         element2 = src1.Float<T>(e * 2);
-        element3 = src2.Float<T>(e * 2 + 1);
+        element3 = src2.Float<T>(f * 2 + 1);
         element4 = src1.Float<T>(e * 2);
         break;
       case 90:
-        element1 = FPNeg(src2.Float<T>(e * 2 + 1));
+        element1 = FPNeg(src2.Float<T>(f * 2 + 1));
         element2 = src1.Float<T>(e * 2 + 1);
-        element3 = src2.Float<T>(e * 2);
+        element3 = src2.Float<T>(f * 2);
         element4 = src1.Float<T>(e * 2 + 1);
         break;
       case 180:
-        element1 = FPNeg(src2.Float<T>(e * 2));
+        element1 = FPNeg(src2.Float<T>(f * 2));
         element2 = src1.Float<T>(e * 2);
-        element3 = FPNeg(src2.Float<T>(e * 2 + 1));
+        element3 = FPNeg(src2.Float<T>(f * 2 + 1));
         element4 = src1.Float<T>(e * 2);
         break;
       case 270:
-        element1 = src2.Float<T>(e * 2 + 1);
+        element1 = src2.Float<T>(f * 2 + 1);
         element2 = src1.Float<T>(e * 2 + 1);
-        element3 = FPNeg(src2.Float<T>(e * 2));
+        element3 = FPNeg(src2.Float<T>(f * 2));
         element4 = src1.Float<T>(e * 2 + 1);
         break;
       default:
@@ -2385,25 +2826,28 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
         return dst;  // prevents "element(n) may be unintialized" errors
     }
     dst.ClearForWrite(vform);
-    dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
-    dst.SetFloat<T>(e * 2 + 1,
-                    FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
+    dst.SetFloat<T>(vform,
+                    e * 2,
+                    FPMulAdd(acc.Float<T>(e * 2), element2, element1));
+    dst.SetFloat<T>(vform,
+                    e * 2 + 1,
+                    FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
   }
   return dst;
 }
 
-
 LogicVRegister Simulator::fcmla(VectorFormat vform,
-                                LogicVRegister dst,          // d
-                                const LogicVRegister& src1,  // n
-                                const LogicVRegister& src2,  // m
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                const LogicVRegister& acc,
                                 int rot) {
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    VIXL_UNIMPLEMENTED();
+    fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    fcmla<float>(vform, dst, src1, src2, rot);
+    fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
   } else {
-    fcmla<double>(vform, dst, src1, src2, rot);
+    fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
   }
   return dst;
 }
@@ -2418,9 +2862,9 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     VIXL_UNIMPLEMENTED();
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    fcmla<float>(vform, dst, src1, src2, index, rot);
+    fcmla<float>(vform, dst, src1, src2, dst, index, rot);
   } else {
-    fcmla<double>(vform, dst, src1, src2, index, rot);
+    fcmla<double>(vform, dst, src1, src2, dst, index, rot);
   }
   return dst;
 }
@@ -2430,23 +2874,59 @@ LogicVRegister Simulator::dup_element(VectorFormat vform,
                                       LogicVRegister dst,
                                       const LogicVRegister& src,
                                       int src_index) {
-  int laneCount = LaneCountFromFormat(vform);
-  uint64_t value = src.Uint(vform, src_index);
-  dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
-    dst.SetUint(vform, i, value);
+  if (vform == kFormatVnQ) {
+    // When duplicating a 128-bit value, split it into two 64-bit parts, and
+    // then copy the two to their slots on destination register.
+    uint64_t low = src.Uint(kFormatVnD, src_index * 2);
+    uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
+    dst.ClearForWrite(vform);
+    for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
+         d_lane += 2) {
+      dst.SetUint(kFormatVnD, d_lane, low);
+      dst.SetUint(kFormatVnD, d_lane + 1, high);
+    }
+  } else {
+    int lane_count = LaneCountFromFormat(vform);
+    uint64_t value = src.Uint(vform, src_index);
+    dst.ClearForWrite(vform);
+    for (int i = 0; i < lane_count; ++i) {
+      dst.SetUint(vform, i, value);
+    }
   }
   return dst;
 }
 
+LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
+                                                   LogicVRegister dst,
+                                                   const LogicVRegister& src,
+                                                   int src_index) {
+  // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
+  // whereas in NEON, the size of segment is equal to the size of register
+  // itself.
+  int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
+  VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
+  int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
+
+  VIXL_ASSERT(src_index >= 0);
+  VIXL_ASSERT(src_index < lanes_per_segment);
+
+  dst.ClearForWrite(vform);
+  for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
+    uint64_t value = src.Uint(vform, j + src_index);
+    for (int i = 0; i < lanes_per_segment; i++) {
+      dst.SetUint(vform, j + i, value);
+    }
+  }
+  return dst;
+}
 
 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
                                         LogicVRegister dst,
                                         uint64_t imm) {
-  int laneCount = LaneCountFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
   uint64_t value = imm & MaxUintFromFormat(vform);
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, value);
   }
   return dst;
@@ -2473,12 +2953,93 @@ LogicVRegister Simulator::ins_immediate(VectorFormat vform,
 }
 
 
+LogicVRegister Simulator::index(VectorFormat vform,
+                                LogicVRegister dst,
+                                uint64_t start,
+                                uint64_t step) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  uint64_t value = start;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetUint(vform, i, value);
+    value += step;
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::insr(VectorFormat vform,
+                               LogicVRegister dst,
+                               uint64_t imm) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
+    dst.SetUint(vform, i, dst.Uint(vform, i - 1));
+  }
+  dst.SetUint(vform, 0, imm);
+  return dst;
+}
+
+
+LogicVRegister Simulator::mov(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src) {
+  dst.ClearForWrite(vform);
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    dst.SetUint(vform, lane, src.Uint(vform, lane));
+  }
+  return dst;
+}
+
+
+LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
+  // Avoid a copy if the registers already alias.
+  if (dst.Aliases(src)) return dst;
+
+  for (int i = 0; i < dst.GetChunkCount(); i++) {
+    dst.SetChunk(i, src.GetChunk(i));
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::mov_merging(VectorFormat vform,
+                                      LogicVRegister dst,
+                                      const SimPRegister& pg,
+                                      const LogicVRegister& src) {
+  return sel(vform, dst, pg, src, dst);
+}
+
+
+LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
+                                      LogicVRegister dst,
+                                      const SimPRegister& pg,
+                                      const LogicVRegister& src) {
+  SimVRegister zero;
+  dup_immediate(vform, zero, 0);
+  return sel(vform, dst, pg, src, zero);
+}
+
+
+LogicPRegister Simulator::mov_merging(LogicPRegister dst,
+                                      const LogicPRegister& pg,
+                                      const LogicPRegister& src) {
+  return sel(dst, pg, src, dst);
+}
+
+
+LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
+                                      const LogicPRegister& pg,
+                                      const LogicPRegister& src) {
+  SimPRegister all_false;
+  return sel(dst, pg, src, pfalse(all_false));
+}
+
+
 LogicVRegister Simulator::movi(VectorFormat vform,
                                LogicVRegister dst,
                                uint64_t imm) {
-  int laneCount = LaneCountFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, imm);
   }
   return dst;
@@ -2488,9 +3049,9 @@ LogicVRegister Simulator::movi(VectorFormat vform,
 LogicVRegister Simulator::mvni(VectorFormat vform,
                                LogicVRegister dst,
                                uint64_t imm) {
-  int laneCount = LaneCountFromFormat(vform);
+  int lane_count = LaneCountFromFormat(vform);
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, ~imm);
   }
   return dst;
@@ -2502,12 +3063,12 @@ LogicVRegister Simulator::orr(VectorFormat vform,
                               const LogicVRegister& src,
                               uint64_t imm) {
   uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; ++i) {
     result[i] = src.Uint(vform, i) | imm;
   }
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -2568,6 +3129,37 @@ LogicVRegister Simulator::sxtl2(VectorFormat vform,
 }
 
 
+LogicVRegister Simulator::uxt(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              unsigned from_size_in_bits) {
+  int lane_count = LaneCountFromFormat(vform);
+  uint64_t mask = GetUintMask(from_size_in_bits);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < lane_count; i++) {
+    dst.SetInt(vform, i, src.Uint(vform, i) & mask);
+  }
+  return dst;
+}
+
+
+LogicVRegister Simulator::sxt(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              unsigned from_size_in_bits) {
+  int lane_count = LaneCountFromFormat(vform);
+
+  dst.ClearForWrite(vform);
+  for (int i = 0; i < lane_count; i++) {
+    uint64_t value =
+        ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
+    dst.SetInt(vform, i, value);
+  }
+  return dst;
+}
+
+
 LogicVRegister Simulator::shrn(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src,
@@ -2615,6 +3207,22 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform,
   return extractnarrow(vformdst, dst, false, shifted_src, false);
 }
 
+LogicVRegister Simulator::Table(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& tab,
+                                const LogicVRegister& ind) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; i++) {
+    uint64_t index = ind.Uint(vform, i);
+    uint64_t value = (index >= static_cast<uint64_t>(lane_count))
+                         ? 0
+                         : tab.Uint(vform, static_cast<int>(index));
+    dst.SetUint(vform, i, value);
+  }
+  return dst;
+}
+
 
 LogicVRegister Simulator::Table(VectorFormat vform,
                                 LogicVRegister dst,
@@ -3182,7 +3790,7 @@ LogicVRegister Simulator::umlsl(VectorFormat vform,
   SimVRegister temp1, temp2;
   uxtl(vform, temp1, src1);
   uxtl(vform, temp2, src2);
-  mls(vform, dst, temp1, temp2);
+  mls(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3194,7 +3802,7 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform,
   SimVRegister temp1, temp2;
   uxtl2(vform, temp1, src1);
   uxtl2(vform, temp2, src2);
-  mls(vform, dst, temp1, temp2);
+  mls(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3206,7 +3814,7 @@ LogicVRegister Simulator::smlsl(VectorFormat vform,
   SimVRegister temp1, temp2;
   sxtl(vform, temp1, src1);
   sxtl(vform, temp2, src2);
-  mls(vform, dst, temp1, temp2);
+  mls(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3218,7 +3826,7 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform,
   SimVRegister temp1, temp2;
   sxtl2(vform, temp1, src1);
   sxtl2(vform, temp2, src2);
-  mls(vform, dst, temp1, temp2);
+  mls(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3230,7 +3838,7 @@ LogicVRegister Simulator::umlal(VectorFormat vform,
   SimVRegister temp1, temp2;
   uxtl(vform, temp1, src1);
   uxtl(vform, temp2, src2);
-  mla(vform, dst, temp1, temp2);
+  mla(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3242,7 +3850,7 @@ LogicVRegister Simulator::umlal2(VectorFormat vform,
   SimVRegister temp1, temp2;
   uxtl2(vform, temp1, src1);
   uxtl2(vform, temp2, src2);
-  mla(vform, dst, temp1, temp2);
+  mla(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3254,7 +3862,7 @@ LogicVRegister Simulator::smlal(VectorFormat vform,
   SimVRegister temp1, temp2;
   sxtl(vform, temp1, src1);
   sxtl(vform, temp2, src2);
-  mla(vform, dst, temp1, temp2);
+  mla(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3266,7 +3874,7 @@ LogicVRegister Simulator::smlal2(VectorFormat vform,
   SimVRegister temp1, temp2;
   sxtl2(vform, temp1, src1);
   sxtl2(vform, temp2, src2);
-  mla(vform, dst, temp1, temp2);
+  mla(vform, dst, dst, temp1, temp2);
   return dst;
 }
 
@@ -3371,7 +3979,7 @@ LogicVRegister Simulator::dot(VectorFormat vform,
 
   dst.ClearForWrite(vform);
   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
-    int64_t result = 0;
+    uint64_t result = 0;
     int64_t element1, element2;
     for (int i = 0; i < 4; i++) {
       int index = 4 * e + i;
@@ -3384,9 +3992,7 @@ LogicVRegister Simulator::dot(VectorFormat vform,
       }
       result += element1 * element2;
     }
-
-    result += dst.Int(vform, e);
-    dst.SetInt(vform, e, result);
+    dst.SetUint(vform, e, result + dst.Uint(vform, e));
   }
   return dst;
 }
@@ -3564,16 +4170,16 @@ LogicVRegister Simulator::trn1(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int pairs = laneCount / 2;
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int pairs = lane_count / 2;
   for (int i = 0; i < pairs; ++i) {
     result[2 * i] = src1.Uint(vform, 2 * i);
     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -3584,16 +4190,16 @@ LogicVRegister Simulator::trn2(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int pairs = laneCount / 2;
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int pairs = lane_count / 2;
   for (int i = 0; i < pairs; ++i) {
     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -3604,16 +4210,16 @@ LogicVRegister Simulator::zip1(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int pairs = laneCount / 2;
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int pairs = lane_count / 2;
   for (int i = 0; i < pairs; ++i) {
     result[2 * i] = src1.Uint(vform, i);
     result[(2 * i) + 1] = src2.Uint(vform, i);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -3624,16 +4230,16 @@ LogicVRegister Simulator::zip2(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[16];
-  int laneCount = LaneCountFromFormat(vform);
-  int pairs = laneCount / 2;
+  uint64_t result[kZRegMaxSizeInBytes];
+  int lane_count = LaneCountFromFormat(vform);
+  int pairs = lane_count / 2;
   for (int i = 0; i < pairs; ++i) {
     result[2 * i] = src1.Uint(vform, pairs + i);
     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[i]);
   }
   return dst;
@@ -3644,15 +4250,15 @@ LogicVRegister Simulator::uzp1(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[32];
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  uint64_t result[kZRegMaxSizeInBytes * 2];
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; ++i) {
     result[i] = src1.Uint(vform, i);
-    result[laneCount + i] = src2.Uint(vform, i);
+    result[lane_count + i] = src2.Uint(vform, i);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[2 * i]);
   }
   return dst;
@@ -3663,15 +4269,15 @@ LogicVRegister Simulator::uzp2(VectorFormat vform,
                                LogicVRegister dst,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
-  uint64_t result[32];
-  int laneCount = LaneCountFromFormat(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  uint64_t result[kZRegMaxSizeInBytes * 2];
+  int lane_count = LaneCountFromFormat(vform);
+  for (int i = 0; i < lane_count; ++i) {
     result[i] = src1.Uint(vform, i);
-    result[laneCount + i] = src2.Uint(vform, i);
+    result[lane_count + i] = src2.Uint(vform, i);
   }
 
   dst.ClearForWrite(vform);
-  for (int i = 0; i < laneCount; ++i) {
+  for (int i = 0; i < lane_count; ++i) {
     dst.SetUint(vform, i, result[(2 * i) + 1]);
   }
   return dst;
@@ -4201,7 +4807,7 @@ uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
       } else {                                                   \
         result = OP(op1, op2);                                   \
       }                                                          \
-      dst.SetFloat(i, result);                                   \
+      dst.SetFloat(vform, i, result);                            \
     }                                                            \
     return dst;                                                  \
   }                                                              \
@@ -4244,7 +4850,7 @@ LogicVRegister Simulator::frecps(VectorFormat vform,
     T op1 = -src1.Float<T>(i);
     T op2 = src2.Float<T>(i);
     T result = FPProcessNaNs(op1, op2);
-    dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
+    dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
   }
   return dst;
 }
@@ -4276,7 +4882,7 @@ LogicVRegister Simulator::frsqrts(VectorFormat vform,
     T op1 = -src1.Float<T>(i);
     T op2 = src2.Float<T>(i);
     T result = FPProcessNaNs(op1, op2);
-    dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
+    dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
   }
   return dst;
 }
@@ -4309,29 +4915,41 @@ LogicVRegister Simulator::fcmp(VectorFormat vform,
     bool result = false;
     T op1 = src1.Float<T>(i);
     T op2 = src2.Float<T>(i);
-    T nan_result = FPProcessNaNs(op1, op2);
-    if (!IsNaN(nan_result)) {
-      switch (cond) {
-        case eq:
-          result = (op1 == op2);
-          break;
-        case ge:
-          result = (op1 >= op2);
-          break;
-        case gt:
-          result = (op1 > op2);
-          break;
-        case le:
-          result = (op1 <= op2);
-          break;
-        case lt:
-          result = (op1 < op2);
-          break;
-        default:
-          VIXL_UNREACHABLE();
-          break;
-      }
+    bool unordered = IsNaN(FPProcessNaNs(op1, op2));
+
+    switch (cond) {
+      case eq:
+        result = (op1 == op2);
+        break;
+      case ge:
+        result = (op1 >= op2);
+        break;
+      case gt:
+        result = (op1 > op2);
+        break;
+      case le:
+        result = (op1 <= op2);
+        break;
+      case lt:
+        result = (op1 < op2);
+        break;
+      case ne:
+        result = (op1 != op2);
+        break;
+      case uo:
+        result = unordered;
+        break;
+      default:
+        // Other conditions are defined in terms of those above.
+        VIXL_UNREACHABLE();
+        break;
+    }
+
+    if (result && unordered) {
+      // Only `uo` and `ne` can be true for unordered comparisons.
+      VIXL_ASSERT((cond == uo) || (cond == ne));
     }
+
     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   }
   return dst;
@@ -4403,15 +5021,16 @@ LogicVRegister Simulator::fabscmp(VectorFormat vform,
 template <typename T>
 LogicVRegister Simulator::fmla(VectorFormat vform,
                                LogicVRegister dst,
+                               const LogicVRegister& srca,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
   dst.ClearForWrite(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     T op1 = src1.Float<T>(i);
     T op2 = src2.Float<T>(i);
-    T acc = dst.Float<T>(i);
+    T acc = srca.Float<T>(i);
     T result = FPMulAdd(acc, op1, op2);
-    dst.SetFloat(i, result);
+    dst.SetFloat(vform, i, result);
   }
   return dst;
 }
@@ -4419,15 +5038,16 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
 
 LogicVRegister Simulator::fmla(VectorFormat vform,
                                LogicVRegister dst,
+                               const LogicVRegister& srca,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    fmla<SimFloat16>(vform, dst, src1, src2);
+    fmla<SimFloat16>(vform, dst, srca, src1, src2);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    fmla<float>(vform, dst, src1, src2);
+    fmla<float>(vform, dst, srca, src1, src2);
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-    fmla<double>(vform, dst, src1, src2);
+    fmla<double>(vform, dst, srca, src1, src2);
   }
   return dst;
 }
@@ -4436,13 +5056,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
 template <typename T>
 LogicVRegister Simulator::fmls(VectorFormat vform,
                                LogicVRegister dst,
+                               const LogicVRegister& srca,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
   dst.ClearForWrite(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     T op1 = -src1.Float<T>(i);
     T op2 = src2.Float<T>(i);
-    T acc = dst.Float<T>(i);
+    T acc = srca.Float<T>(i);
     T result = FPMulAdd(acc, op1, op2);
     dst.SetFloat(i, result);
   }
@@ -4452,15 +5073,16 @@ LogicVRegister Simulator::fmls(VectorFormat vform,
 
 LogicVRegister Simulator::fmls(VectorFormat vform,
                                LogicVRegister dst,
+                               const LogicVRegister& srca,
                                const LogicVRegister& src1,
                                const LogicVRegister& src2) {
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    fmls<SimFloat16>(vform, dst, src1, src2);
+    fmls<SimFloat16>(vform, dst, srca, src1, src2);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    fmls<float>(vform, dst, src1, src2);
+    fmls<float>(vform, dst, srca, src1, src2);
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-    fmls<double>(vform, dst, src1, src2);
+    fmls<double>(vform, dst, srca, src1, src2);
   }
   return dst;
 }
@@ -4740,75 +5362,131 @@ NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
 #undef DEFINE_NEON_FP_PAIR_OP
 
 template <typename T>
-LogicVRegister Simulator::fminmaxv(VectorFormat vform,
-                                   LogicVRegister dst,
-                                   const LogicVRegister& src,
-                                   typename TFPMinMaxOp<T>::type Op) {
-  VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) ||
-              (vform == kFormat4S));
-  USE(vform);
-  T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1));
-  T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3));
-  if (vform == kFormat8H) {
-    T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5));
-    T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7));
-    result1 = (this->*Op)(result1, result3);
-    result2 = (this->*Op)(result2, result4);
-  }
-  T result = (this->*Op)(result1, result2);
+LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
+                                               LogicVRegister dst,
+                                               const LogicVRegister& src,
+                                               typename TFPPairOp<T>::type fn,
+                                               uint64_t inactive_value) {
+  int lane_count = LaneCountFromFormat(vform);
+  T result[kZRegMaxSizeInBytes / sizeof(T)];
+  // Copy the source vector into a working array. Initialise the unused elements
+  // at the end of the array to the same value that a false predicate would set.
+  for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
+    result[i] = (i < lane_count)
+                    ? src.Float<T>(i)
+                    : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
+  }
+
+  // Pairwise reduce the elements to a single value, using the pair op function
+  // argument.
+  for (int step = 1; step < lane_count; step *= 2) {
+    for (int i = 0; i < lane_count; i += step * 2) {
+      result[i] = (this->*fn)(result[i], result[i + step]);
+    }
+  }
   dst.ClearForWrite(ScalarFormatFromFormat(vform));
-  dst.SetFloat<T>(0, result);
-  return dst;
+  dst.SetFloat<T>(0, result[0]);
+  return dst;
+}
+
+LogicVRegister Simulator::FPPairedAcrossHelper(
+    VectorFormat vform,
+    LogicVRegister dst,
+    const LogicVRegister& src,
+    typename TFPPairOp<SimFloat16>::type fn16,
+    typename TFPPairOp<float>::type fn32,
+    typename TFPPairOp<double>::type fn64,
+    uint64_t inactive_value) {
+  switch (LaneSizeInBitsFromFormat(vform)) {
+    case kHRegSize:
+      return FPPairedAcrossHelper<SimFloat16>(vform,
+                                              dst,
+                                              src,
+                                              fn16,
+                                              inactive_value);
+    case kSRegSize:
+      return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
+    default:
+      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+      return FPPairedAcrossHelper<double>(vform,
+                                          dst,
+                                          src,
+                                          fn64,
+                                          inactive_value);
+  }
 }
 
+LogicVRegister Simulator::faddv(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  return FPPairedAcrossHelper(vform,
+                              dst,
+                              src,
+                              &Simulator::FPAdd<SimFloat16>,
+                              &Simulator::FPAdd<float>,
+                              &Simulator::FPAdd<double>,
+                              0);
+}
 
 LogicVRegister Simulator::fmaxv(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>);
-  } else {
-    return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>);
-  }
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  uint64_t inactive_value =
+      FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
+  return FPPairedAcrossHelper(vform,
+                              dst,
+                              src,
+                              &Simulator::FPMax<SimFloat16>,
+                              &Simulator::FPMax<float>,
+                              &Simulator::FPMax<double>,
+                              inactive_value);
 }
 
 
 LogicVRegister Simulator::fminv(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src) {
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>);
-  } else {
-    return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>);
-  }
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  uint64_t inactive_value =
+      FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
+  return FPPairedAcrossHelper(vform,
+                              dst,
+                              src,
+                              &Simulator::FPMin<SimFloat16>,
+                              &Simulator::FPMin<float>,
+                              &Simulator::FPMin<double>,
+                              inactive_value);
 }
 
 
 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
                                   LogicVRegister dst,
                                   const LogicVRegister& src) {
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    return fminmaxv<SimFloat16>(vform,
-                                dst,
-                                src,
-                                &Simulator::FPMaxNM<SimFloat16>);
-  } else {
-    return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>);
-  }
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+  return FPPairedAcrossHelper(vform,
+                              dst,
+                              src,
+                              &Simulator::FPMaxNM<SimFloat16>,
+                              &Simulator::FPMaxNM<float>,
+                              &Simulator::FPMaxNM<double>,
+                              inactive_value);
 }
 
 
 LogicVRegister Simulator::fminnmv(VectorFormat vform,
                                   LogicVRegister dst,
                                   const LogicVRegister& src) {
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    return fminmaxv<SimFloat16>(vform,
-                                dst,
-                                src,
-                                &Simulator::FPMinNM<SimFloat16>);
-  } else {
-    return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>);
-  }
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+  uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+  return FPPairedAcrossHelper(vform,
+                              dst,
+                              src,
+                              &Simulator::FPMinNM<SimFloat16>,
+                              &Simulator::FPMinNM<float>,
+                              &Simulator::FPMinNM<double>,
+                              inactive_value);
 }
 
 
@@ -4843,14 +5521,14 @@ LogicVRegister Simulator::fmla(VectorFormat vform,
   SimVRegister temp;
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
-    fmla<SimFloat16>(vform, dst, src1, index_reg);
+    fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
-    fmla<float>(vform, dst, src1, index_reg);
+    fmla<float>(vform, dst, dst, src1, index_reg);
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
-    fmla<double>(vform, dst, src1, index_reg);
+    fmla<double>(vform, dst, dst, src1, index_reg);
   }
   return dst;
 }
@@ -4865,14 +5543,14 @@ LogicVRegister Simulator::fmls(VectorFormat vform,
   SimVRegister temp;
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
-    fmls<SimFloat16>(vform, dst, src1, index_reg);
+    fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
-    fmls<float>(vform, dst, src1, index_reg);
+    fmls<float>(vform, dst, dst, src1, index_reg);
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
-    fmls<double>(vform, dst, src1, index_reg);
+    fmls<double>(vform, dst, dst, src1, index_reg);
   }
   return dst;
 }
@@ -4941,62 +5619,142 @@ LogicVRegister Simulator::frint(VectorFormat vform,
   return dst;
 }
 
+LogicVRegister Simulator::fcvt(VectorFormat vform,
+                               unsigned dst_data_size_in_bits,
+                               unsigned src_data_size_in_bits,
+                               LogicVRegister dst,
+                               const LogicPRegister& pg,
+                               const LogicVRegister& src) {
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+                                                      0,
+                                                      src.Uint(vform, i));
+    double dst_value =
+        RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
+
+    uint64_t dst_raw_bits =
+        FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
+
+    dst.SetUint(vform, i, dst_raw_bits);
+  }
+
+  return dst;
+}
 
 LogicVRegister Simulator::fcvts(VectorFormat vform,
+                                unsigned dst_data_size_in_bits,
+                                unsigned src_data_size_in_bits,
                                 LogicVRegister dst,
+                                const LogicPRegister& pg,
                                 const LogicVRegister& src,
-                                FPRounding rounding_mode,
+                                FPRounding round,
                                 int fbits) {
-  dst.ClearForWrite(vform);
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      SimFloat16 op =
-          static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
-      dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
-    }
-  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
-      dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
-    }
-  } else {
-    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      double op = src.Float<double>(i) * std::pow(2.0, fbits);
-      dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+                                               0,
+                                               src.Uint(vform, i));
+    double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
+                    std::pow(2.0, fbits);
+
+    switch (dst_data_size_in_bits) {
+      case kHRegSize:
+        dst.SetInt(vform, i, FPToInt16(result, round));
+        break;
+      case kSRegSize:
+        dst.SetInt(vform, i, FPToInt32(result, round));
+        break;
+      case kDRegSize:
+        dst.SetInt(vform, i, FPToInt64(result, round));
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
     }
   }
+
   return dst;
 }
 
+LogicVRegister Simulator::fcvts(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                FPRounding round,
+                                int fbits) {
+  dst.ClearForWrite(vform);
+  return fcvts(vform,
+               LaneSizeInBitsFromFormat(vform),
+               LaneSizeInBitsFromFormat(vform),
+               dst,
+               GetPTrue(),
+               src,
+               round,
+               fbits);
+}
 
 LogicVRegister Simulator::fcvtu(VectorFormat vform,
+                                unsigned dst_data_size_in_bits,
+                                unsigned src_data_size_in_bits,
                                 LogicVRegister dst,
+                                const LogicPRegister& pg,
                                 const LogicVRegister& src,
-                                FPRounding rounding_mode,
+                                FPRounding round,
                                 int fbits) {
-  dst.ClearForWrite(vform);
-  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      SimFloat16 op =
-          static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
-      dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
-    }
-  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      float op = src.Float<float>(i) * std::pow(2.0f, fbits);
-      dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
-    }
-  } else {
-    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-      double op = src.Float<double>(i) * std::pow(2.0, fbits);
-      dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+                                               0,
+                                               src.Uint(vform, i));
+    double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
+                    std::pow(2.0, fbits);
+
+    switch (dst_data_size_in_bits) {
+      case kHRegSize:
+        dst.SetUint(vform, i, FPToUInt16(result, round));
+        break;
+      case kSRegSize:
+        dst.SetUint(vform, i, FPToUInt32(result, round));
+        break;
+      case kDRegSize:
+        dst.SetUint(vform, i, FPToUInt64(result, round));
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
     }
   }
+
   return dst;
 }
 
+LogicVRegister Simulator::fcvtu(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src,
+                                FPRounding round,
+                                int fbits) {
+  dst.ClearForWrite(vform);
+  return fcvtu(vform,
+               LaneSizeInBitsFromFormat(vform),
+               LaneSizeInBitsFromFormat(vform),
+               dst,
+               GetPTrue(),
+               src,
+               round,
+               fbits);
+}
 
 LogicVRegister Simulator::fcvtl(VectorFormat vform,
                                 LogicVRegister dst,
@@ -5208,18 +5966,18 @@ LogicVRegister Simulator::frsqrte(VectorFormat vform,
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       SimFloat16 input = src.Float<SimFloat16>(i);
-      dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input));
+      dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
     }
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       float input = src.Float<float>(i);
-      dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
+      dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
     }
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       double input = src.Float<double>(i);
-      dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
+      dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
     }
   }
   return dst;
@@ -5354,18 +6112,18 @@ LogicVRegister Simulator::frecpe(VectorFormat vform,
   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       SimFloat16 input = src.Float<SimFloat16>(i);
-      dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round));
+      dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
     }
   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       float input = src.Float<float>(i);
-      dst.SetFloat(i, FPRecipEstimate<float>(input, round));
+      dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
     }
   } else {
     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
       double input = src.Float<double>(i);
-      dst.SetFloat(i, FPRecipEstimate<double>(input, round));
+      dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
     }
   }
   return dst;
@@ -5426,6 +6184,47 @@ LogicVRegister Simulator::urecpe(VectorFormat vform,
   return dst;
 }
 
+LogicPRegister Simulator::pfalse(LogicPRegister dst) {
+  dst.Clear();
+  return dst;
+}
+
+LogicPRegister Simulator::pfirst(LogicPRegister dst,
+                                 const LogicPRegister& pg,
+                                 const LogicPRegister& src) {
+  int first_pg = GetFirstActive(kFormatVnB, pg);
+  VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
+  mov(dst, src);
+  if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
+  return dst;
+}
+
+LogicPRegister Simulator::ptrue(VectorFormat vform,
+                                LogicPRegister dst,
+                                int pattern) {
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetActive(vform, i, i < count);
+  }
+  return dst;
+}
+
+LogicPRegister Simulator::pnext(VectorFormat vform,
+                                LogicPRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicPRegister& src) {
+  int next = GetLastActive(vform, src) + 1;
+  while (next < LaneCountFromFormat(vform)) {
+    if (pg.IsActive(vform, next)) break;
+    next++;
+  }
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    dst.SetActive(vform, i, (i == next));
+  }
+  return dst;
+}
+
 template <typename T>
 LogicVRegister Simulator::frecpx(VectorFormat vform,
                                  LogicVRegister dst,
@@ -5477,49 +6276,1143 @@ LogicVRegister Simulator::frecpx(VectorFormat vform,
   return dst;
 }
 
+LogicVRegister Simulator::ftsmul(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  SimVRegister maybe_neg_src1;
+
+  // The bottom bit of src2 controls the sign of the result. Use it to
+  // conditionally invert the sign of one `fmul` operand.
+  shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
+  eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
+
+  // Multiply src1 by the modified neg_src1, which is potentially its negation.
+  // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
+  // rather than neg_src1, must be the first source argument.
+  fmul(vform, dst, src1, maybe_neg_src1);
+
+  return dst;
+}
+
+LogicVRegister Simulator::ftssel(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
+  uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
+  uint64_t one;
+
+  if (lane_bits == kHRegSize) {
+    one = Float16ToRawbits(Float16(1.0));
+  } else if (lane_bits == kSRegSize) {
+    one = FloatToRawbits(1.0);
+  } else {
+    VIXL_ASSERT(lane_bits == kDRegSize);
+    one = DoubleToRawbits(1.0);
+  }
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    // Use integer accessors for this operation, as this is a data manipulation
+    // task requiring no calculation.
+    uint64_t op = src1.Uint(vform, i);
+
+    // Only the bottom two bits of the src2 register are significant, indicating
+    // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
+    // determines the sign of the value written to dst.
+    uint64_t q = src2.Uint(vform, i);
+    if ((q & 1) == 1) op = one;
+    if ((q & 2) == 2) op ^= sign_bit;
+
+    dst.SetUint(vform, i, op);
+  }
+
+  return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
+                                       LogicVRegister dst,
+                                       const LogicVRegister& src1,
+                                       const LogicVRegister& src2,
+                                       uint64_t coeff_pos,
+                                       uint64_t coeff_neg) {
+  SimVRegister zero;
+  dup_immediate(kFormatVnB, zero, 0);
+
+  SimVRegister cf;
+  SimVRegister cfn;
+  dup_immediate(vform, cf, coeff_pos);
+  dup_immediate(vform, cfn, coeff_neg);
+
+  // The specification requires testing the top bit of the raw value, rather
+  // than the sign of the floating point number, so use an integer comparison
+  // here.
+  SimPRegister is_neg;
+  SVEIntCompareVectorsHelper(lt,
+                             vform,
+                             is_neg,
+                             GetPTrue(),
+                             src2,
+                             zero,
+                             false,
+                             LeaveFlags);
+  mov_merging(vform, cf, is_neg, cfn);
+
+  SimVRegister temp;
+  fabs_<T>(vform, temp, src2);
+  fmla<T>(vform, cf, cf, src1, temp);
+  mov(vform, dst, cf);
+  return dst;
+}
+
+
+LogicVRegister Simulator::ftmad(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src1,
+                                const LogicVRegister& src2,
+                                unsigned index) {
+  static const uint64_t ftmad_coeff16[] = {0x3c00,
+                                           0xb155,
+                                           0x2030,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000,
+                                           0x3c00,
+                                           0xb800,
+                                           0x293a,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000,
+                                           0x0000};
+
+  static const uint64_t ftmad_coeff32[] = {0x3f800000,
+                                           0xbe2aaaab,
+                                           0x3c088886,
+                                           0xb95008b9,
+                                           0x36369d6d,
+                                           0x00000000,
+                                           0x00000000,
+                                           0x00000000,
+                                           0x3f800000,
+                                           0xbf000000,
+                                           0x3d2aaaa6,
+                                           0xbab60705,
+                                           0x37cd37cc,
+                                           0x00000000,
+                                           0x00000000,
+                                           0x00000000};
+
+  static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
+                                           0xbfc5555555555543,
+                                           0x3f8111111110f30c,
+                                           0xbf2a01a019b92fc6,
+                                           0x3ec71de351f3d22b,
+                                           0xbe5ae5e2b60f7b91,
+                                           0x3de5d8408868552f,
+                                           0x0000000000000000,
+                                           0x3ff0000000000000,
+                                           0xbfe0000000000000,
+                                           0x3fa5555555555536,
+                                           0xbf56c16c16c13a0b,
+                                           0x3efa01a019b1e8d8,
+                                           0xbe927e4f7282f468,
+                                           0x3e21ee96d2641b13,
+                                           0xbda8f76380fbb401};
+  VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
+  VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
+  VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
+
+  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+    FTMaddHelper<SimFloat16>(vform,
+                             dst,
+                             src1,
+                             src2,
+                             ftmad_coeff16[index],
+                             ftmad_coeff16[index + 8]);
+  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    FTMaddHelper<float>(vform,
+                        dst,
+                        src1,
+                        src2,
+                        ftmad_coeff32[index],
+                        ftmad_coeff32[index + 8]);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    FTMaddHelper<double>(vform,
+                         dst,
+                         src1,
+                         src2,
+                         ftmad_coeff64[index],
+                         ftmad_coeff64[index + 8]);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::fexpa(VectorFormat vform,
+                                LogicVRegister dst,
+                                const LogicVRegister& src) {
+  static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
+                                           0x005d, 0x0075, 0x008e, 0x00a8,
+                                           0x00c2, 0x00dc, 0x00f8, 0x0114,
+                                           0x0130, 0x014d, 0x016b, 0x0189,
+                                           0x01a8, 0x01c8, 0x01e8, 0x0209,
+                                           0x022b, 0x024e, 0x0271, 0x0295,
+                                           0x02ba, 0x02e0, 0x0306, 0x032e,
+                                           0x0356, 0x037f, 0x03a9, 0x03d4};
+
+  static const uint64_t fexpa_coeff32[] =
+      {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
+       0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
+       0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
+       0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+       0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
+       0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
+       0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
+       0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+       0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
+       0x7d3e0c};
+
+  static const uint64_t fexpa_coeff64[] =
+      {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
+       0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
+       0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
+       0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
+       0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
+       0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
+       0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
+       0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
+       0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
+       0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
+       0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
+       0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
+       0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
+       0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
+       0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
+       0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
+
+  unsigned lane_size = LaneSizeInBitsFromFormat(vform);
+  int index_highbit = 5;
+  int op_highbit, op_shift;
+  const uint64_t* fexpa_coeff;
+
+  if (lane_size == kHRegSize) {
+    index_highbit = 4;
+    VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
+    fexpa_coeff = fexpa_coeff16;
+    op_highbit = 9;
+    op_shift = 10;
+  } else if (lane_size == kSRegSize) {
+    VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
+    fexpa_coeff = fexpa_coeff32;
+    op_highbit = 13;
+    op_shift = 23;
+  } else {
+    VIXL_ASSERT(lane_size == kDRegSize);
+    VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
+    fexpa_coeff = fexpa_coeff64;
+    op_highbit = 16;
+    op_shift = 52;
+  }
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t op = src.Uint(vform, i);
+    uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
+    result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
+    dst.SetUint(vform, i, result);
+  }
+  return dst;
+}
+
+template <typename T>
+LogicVRegister Simulator::fscale(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  T two = T(2.0);
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    T s1 = src1.Float<T>(i);
+    if (!IsNaN(s1)) {
+      int64_t scale = src2.Int(vform, i);
+      // TODO: this is a low-performance implementation, but it's simple and
+      // less likely to be buggy. Consider replacing it with something faster.
+
+      // Scales outside of these bounds become infinity or zero, so there's no
+      // point iterating further.
+      scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
+
+      // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
+      // decrement scale until it's zero.
+      while (scale-- > 0) {
+        s1 = FPMul(s1, two);
+      }
+
+      // If scale is negative, divide by two and increment scale until it's
+      // zero. Initially, scale is (src2 - 1), so we pre-increment.
+      while (++scale < 0) {
+        s1 = FPDiv(s1, two);
+      }
+    }
+    dst.SetFloat<T>(i, s1);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::fscale(VectorFormat vform,
+                                 LogicVRegister dst,
+                                 const LogicVRegister& src1,
+                                 const LogicVRegister& src2) {
+  if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+    fscale<SimFloat16>(vform, dst, src1, src2);
+  } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+    fscale<float>(vform, dst, src1, src2);
+  } else {
+    VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+    fscale<double>(vform, dst, src1, src2);
+  }
+  return dst;
+}
+
+LogicVRegister Simulator::scvtf(VectorFormat vform,
+                                unsigned dst_data_size_in_bits,
+                                unsigned src_data_size_in_bits,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src,
+                                FPRounding round,
+                                int fbits) {
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
+                                            0,
+                                            src.Uint(vform, i));
+
+    switch (dst_data_size_in_bits) {
+      case kHRegSize: {
+        SimFloat16 result = FixedToFloat16(value, fbits, round);
+        dst.SetUint(vform, i, Float16ToRawbits(result));
+        break;
+      }
+      case kSRegSize: {
+        float result = FixedToFloat(value, fbits, round);
+        dst.SetUint(vform, i, FloatToRawbits(result));
+        break;
+      }
+      case kDRegSize: {
+        double result = FixedToDouble(value, fbits, round);
+        dst.SetUint(vform, i, DoubleToRawbits(result));
+        break;
+      }
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  }
+
+  return dst;
+}
+
 LogicVRegister Simulator::scvtf(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src,
                                 int fbits,
                                 FPRounding round) {
+  return scvtf(vform,
+               LaneSizeInBitsFromFormat(vform),
+               LaneSizeInBitsFromFormat(vform),
+               dst,
+               GetPTrue(),
+               src,
+               round,
+               fbits);
+}
+
+LogicVRegister Simulator::ucvtf(VectorFormat vform,
+                                unsigned dst_data_size_in_bits,
+                                unsigned src_data_size_in_bits,
+                                LogicVRegister dst,
+                                const LogicPRegister& pg,
+                                const LogicVRegister& src,
+                                FPRounding round,
+                                int fbits) {
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
+  VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-      SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
-      dst.SetFloat<SimFloat16>(i, result);
-    } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-      float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
-      dst.SetFloat<float>(i, result);
-    } else {
-      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-      double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
-      dst.SetFloat<double>(i, result);
+    if (!pg.IsActive(vform, i)) continue;
+
+    uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
+                                               0,
+                                               src.Uint(vform, i));
+
+    switch (dst_data_size_in_bits) {
+      case kHRegSize: {
+        SimFloat16 result = UFixedToFloat16(value, fbits, round);
+        dst.SetUint(vform, i, Float16ToRawbits(result));
+        break;
+      }
+      case kSRegSize: {
+        float result = UFixedToFloat(value, fbits, round);
+        dst.SetUint(vform, i, FloatToRawbits(result));
+        break;
+      }
+      case kDRegSize: {
+        double result = UFixedToDouble(value, fbits, round);
+        dst.SetUint(vform, i, DoubleToRawbits(result));
+        break;
+      }
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
     }
   }
+
   return dst;
 }
 
-
 LogicVRegister Simulator::ucvtf(VectorFormat vform,
                                 LogicVRegister dst,
                                 const LogicVRegister& src,
                                 int fbits,
                                 FPRounding round) {
+  return ucvtf(vform,
+               LaneSizeInBitsFromFormat(vform),
+               LaneSizeInBitsFromFormat(vform),
+               dst,
+               GetPTrue(),
+               src,
+               round,
+               fbits);
+}
+
+LogicVRegister Simulator::unpk(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src,
+                               UnpackType unpack_type,
+                               ExtendType extend_type) {
+  VectorFormat vform_half = VectorFormatHalfWidth(vform);
+  const int lane_count = LaneCountFromFormat(vform);
+  const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
+
+  switch (extend_type) {
+    case kSignedExtend: {
+      int64_t result[kZRegMaxSizeInBytes];
+      for (int i = 0; i < lane_count; ++i) {
+        result[i] = src.Int(vform_half, i + src_start_lane);
+      }
+      for (int i = 0; i < lane_count; ++i) {
+        dst.SetInt(vform, i, result[i]);
+      }
+      break;
+    }
+    case kUnsignedExtend: {
+      uint64_t result[kZRegMaxSizeInBytes];
+      for (int i = 0; i < lane_count; ++i) {
+        result[i] = src.Uint(vform_half, i + src_start_lane);
+      }
+      for (int i = 0; i < lane_count; ++i) {
+        dst.SetUint(vform, i, result[i]);
+      }
+      break;
+    }
+    default:
+      VIXL_UNREACHABLE();
+  }
+  return dst;
+}
+
+LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
+                                                     VectorFormat vform,
+                                                     LogicPRegister dst,
+                                                     const LogicPRegister& mask,
+                                                     const LogicVRegister& src1,
+                                                     const LogicVRegister& src2,
+                                                     bool is_wide_elements,
+                                                     FlagsUpdate flags) {
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    bool result = false;
+    if (mask.IsActive(vform, lane)) {
+      int64_t op1 = 0xbadbeef;
+      int64_t op2 = 0xbadbeef;
+      int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
+      switch (cond) {
+        case eq:
+        case ge:
+        case gt:
+        case lt:
+        case le:
+        case ne:
+          op1 = src1.Int(vform, lane);
+          op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
+                                 : src2.Int(vform, lane);
+          break;
+        case hi:
+        case hs:
+        case ls:
+        case lo:
+          op1 = src1.Uint(vform, lane);
+          op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
+                                 : src2.Uint(vform, lane);
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+
+      switch (cond) {
+        case eq:
+          result = (op1 == op2);
+          break;
+        case ne:
+          result = (op1 != op2);
+          break;
+        case ge:
+          result = (op1 >= op2);
+          break;
+        case gt:
+          result = (op1 > op2);
+          break;
+        case le:
+          result = (op1 <= op2);
+          break;
+        case lt:
+          result = (op1 < op2);
+          break;
+        case hs:
+          result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
+          break;
+        case hi:
+          result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
+          break;
+        case ls:
+          result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
+          break;
+        case lo:
+          result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
+          break;
+        default:
+          VIXL_UNREACHABLE();
+      }
+    }
+    dst.SetActive(vform, lane, result);
+  }
+
+  if (flags == SetFlags) PredTest(vform, mask, dst);
+
+  return dst;
+}
+
+LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
+                                                VectorFormat vform,
+                                                LogicVRegister dst,
+                                                const LogicVRegister& src1,
+                                                const LogicVRegister& src2,
+                                                bool is_wide_elements) {
+  unsigned lane_size = LaneSizeInBitsFromFormat(vform);
+  VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
+
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    int shift_src_lane = lane;
+    if (is_wide_elements) {
+      // If the shift amount comes from wide elements, select the D-sized lane
+      // which occupies the corresponding lanes of the value to be shifted.
+      shift_src_lane = (lane * lane_size) / kDRegSize;
+    }
+    uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
+
+    // Saturate shift_amount to the size of the lane that will be shifted.
+    if (shift_amount > lane_size) shift_amount = lane_size;
+
+    uint64_t value = src1.Uint(vform, lane);
+    int64_t result = ShiftOperand(lane_size,
+                                  value,
+                                  shift_op,
+                                  static_cast<unsigned>(shift_amount));
+    dst.SetUint(vform, lane, result);
+  }
+
+  return dst;
+}
+
+LogicVRegister Simulator::asrd(VectorFormat vform,
+                               LogicVRegister dst,
+                               const LogicVRegister& src1,
+                               int shift) {
+  VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
+                              LaneSizeInBitsFromFormat(vform)));
+
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
-    if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
-      SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
-      dst.SetFloat<SimFloat16>(i, result);
-    } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
-      float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
-      dst.SetFloat<float>(i, result);
+    int64_t value = src1.Int(vform, i);
+    if (shift <= 63) {
+      if (value < 0) {
+        // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
+        // cast to int64_t, and cannot cause signed overflow in the result.
+        value = value + GetUintMask(shift);
+      }
+      value = ShiftOperand(kDRegSize, value, ASR, shift);
     } else {
-      VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
-      double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
-      dst.SetFloat<double>(i, result);
+      value = 0;
     }
+    dst.SetInt(vform, i, value);
   }
   return dst;
 }
 
+LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
+    LogicalOp logical_op,
+    VectorFormat vform,
+    LogicVRegister zd,
+    const LogicVRegister& zn,
+    const LogicVRegister& zm) {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t op1 = zn.Uint(vform, i);
+    uint64_t op2 = zm.Uint(vform, i);
+    uint64_t result;
+    switch (logical_op) {
+      case AND:
+        result = op1 & op2;
+        break;
+      case BIC:
+        result = op1 & ~op2;
+        break;
+      case EOR:
+        result = op1 ^ op2;
+        break;
+      case ORR:
+        result = op1 | op2;
+        break;
+      default:
+        result = 0;
+        VIXL_UNIMPLEMENTED();
+    }
+    zd.SetUint(vform, i, result);
+  }
+
+  return zd;
+}
+
+LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
+                                                    LogicPRegister pd,
+                                                    const LogicPRegister& pn,
+                                                    const LogicPRegister& pm) {
+  for (int i = 0; i < pn.GetChunkCount(); i++) {
+    LogicPRegister::ChunkType op1 = pn.GetChunk(i);
+    LogicPRegister::ChunkType op2 = pm.GetChunk(i);
+    LogicPRegister::ChunkType result;
+    switch (op) {
+      case ANDS_p_p_pp_z:
+      case AND_p_p_pp_z:
+        result = op1 & op2;
+        break;
+      case BICS_p_p_pp_z:
+      case BIC_p_p_pp_z:
+        result = op1 & ~op2;
+        break;
+      case EORS_p_p_pp_z:
+      case EOR_p_p_pp_z:
+        result = op1 ^ op2;
+        break;
+      case NANDS_p_p_pp_z:
+      case NAND_p_p_pp_z:
+        result = ~(op1 & op2);
+        break;
+      case NORS_p_p_pp_z:
+      case NOR_p_p_pp_z:
+        result = ~(op1 | op2);
+        break;
+      case ORNS_p_p_pp_z:
+      case ORN_p_p_pp_z:
+        result = op1 | ~op2;
+        break;
+      case ORRS_p_p_pp_z:
+      case ORR_p_p_pp_z:
+        result = op1 | op2;
+        break;
+      default:
+        result = 0;
+        VIXL_UNIMPLEMENTED();
+    }
+    pd.SetChunk(i, result);
+  }
+  return pd;
+}
+
+LogicVRegister Simulator::SVEBitwiseImmHelper(
+    SVEBitwiseLogicalWithImm_UnpredicatedOp op,
+    VectorFormat vform,
+    LogicVRegister zd,
+    uint64_t imm) {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t op1 = zd.Uint(vform, i);
+    uint64_t result;
+    switch (op) {
+      case AND_z_zi:
+        result = op1 & imm;
+        break;
+      case EOR_z_zi:
+        result = op1 ^ imm;
+        break;
+      case ORR_z_zi:
+        result = op1 | imm;
+        break;
+      default:
+        result = 0;
+        VIXL_UNIMPLEMENTED();
+    }
+    zd.SetUint(vform, i, result);
+  }
+
+  return zd;
+}
+
+void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
+                                         const LogicPRegister& pg,
+                                         unsigned zt_code,
+                                         const LogicSVEAddressVector& addr) {
+  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+
+  int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
+  int msize_in_bytes = addr.GetMsizeInBytes();
+  int reg_count = addr.GetRegCount();
+
+  VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+  VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
+
+  unsigned zt_codes[4] = {zt_code,
+                          (zt_code + 1) % kNumberOfZRegisters,
+                          (zt_code + 2) % kNumberOfZRegisters,
+                          (zt_code + 3) % kNumberOfZRegisters};
+
+  LogicVRegister zt[4] = {
+      ReadVRegister(zt_codes[0]),
+      ReadVRegister(zt_codes[1]),
+      ReadVRegister(zt_codes[2]),
+      ReadVRegister(zt_codes[3]),
+  };
+
+  // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
+  // are ignored, so read the source register using the VectorFormat that
+  // corresponds with the storage format, and multiply the index accordingly.
+  VectorFormat unpack_vform =
+      SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+  int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (!pg.IsActive(vform, i)) continue;
+
+    for (int r = 0; r < reg_count; r++) {
+      uint64_t element_address = addr.GetElementAddress(i, r);
+      zt[r].WriteUintToMem(unpack_vform, i << unpack_shift, element_address);
+    }
+  }
+
+  if (ShouldTraceWrites()) {
+    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+    if (esize_in_bytes_log2 == msize_in_bytes_log2) {
+      // Use an FP format where it's likely that we're accessing FP data.
+      format = GetPrintRegisterFormatTryFP(format);
+    }
+    // Stores don't represent a change to the source register's value, so only
+    // print the relevant part of the value.
+    format = GetPrintRegPartial(format);
+
+    PrintZStructAccess(zt_code,
+                       reg_count,
+                       pg,
+                       format,
+                       msize_in_bytes,
+                       "->",
+                       addr);
+  }
+}
+
+void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
+                                        const LogicPRegister& pg,
+                                        unsigned zt_code,
+                                        const LogicSVEAddressVector& addr,
+                                        bool is_signed) {
+  int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
+  int msize_in_bytes = addr.GetMsizeInBytes();
+  int reg_count = addr.GetRegCount();
+
+  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+  VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+  VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
+
+  unsigned zt_codes[4] = {zt_code,
+                          (zt_code + 1) % kNumberOfZRegisters,
+                          (zt_code + 2) % kNumberOfZRegisters,
+                          (zt_code + 3) % kNumberOfZRegisters};
+  LogicVRegister zt[4] = {
+      ReadVRegister(zt_codes[0]),
+      ReadVRegister(zt_codes[1]),
+      ReadVRegister(zt_codes[2]),
+      ReadVRegister(zt_codes[3]),
+  };
+
+  VectorFormat unpack_vform =
+      SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    for (int r = 0; r < reg_count; r++) {
+      uint64_t element_address = addr.GetElementAddress(i, r);
+
+      if (!pg.IsActive(vform, i)) {
+        zt[r].SetUint(vform, i, 0);
+        continue;
+      }
+
+      if (is_signed) {
+        zt[r].ReadIntFromMem(vform,
+                             LaneSizeInBitsFromFormat(unpack_vform),
+                             i,
+                             element_address);
+
+      } else {
+        zt[r].ReadUintFromMem(vform,
+                              LaneSizeInBitsFromFormat(unpack_vform),
+                              i,
+                              element_address);
+      }
+    }
+  }
+
+  if (ShouldTraceVRegs()) {
+    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+    if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
+      // Use an FP format where it's likely that we're accessing FP data.
+      format = GetPrintRegisterFormatTryFP(format);
+    }
+    PrintZStructAccess(zt_code,
+                       reg_count,
+                       pg,
+                       format,
+                       msize_in_bytes,
+                       "<-",
+                       addr);
+  }
+}
+
+LogicPRegister Simulator::brka(LogicPRegister pd,
+                               const LogicPRegister& pg,
+                               const LogicPRegister& pn) {
+  bool break_ = false;
+  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+    if (pg.IsActive(kFormatVnB, i)) {
+      pd.SetActive(kFormatVnB, i, !break_);
+      break_ |= pn.IsActive(kFormatVnB, i);
+    }
+  }
+
+  return pd;
+}
+
+LogicPRegister Simulator::brkb(LogicPRegister pd,
+                               const LogicPRegister& pg,
+                               const LogicPRegister& pn) {
+  bool break_ = false;
+  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+    if (pg.IsActive(kFormatVnB, i)) {
+      break_ |= pn.IsActive(kFormatVnB, i);
+      pd.SetActive(kFormatVnB, i, !break_);
+    }
+  }
+
+  return pd;
+}
+
+LogicPRegister Simulator::brkn(LogicPRegister pdm,
+                               const LogicPRegister& pg,
+                               const LogicPRegister& pn) {
+  if (!IsLastActive(kFormatVnB, pg, pn)) {
+    pfalse(pdm);
+  }
+  return pdm;
+}
+
+LogicPRegister Simulator::brkpa(LogicPRegister pd,
+                                const LogicPRegister& pg,
+                                const LogicPRegister& pn,
+                                const LogicPRegister& pm) {
+  bool last_active = IsLastActive(kFormatVnB, pg, pn);
+
+  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+    bool active = false;
+    if (pg.IsActive(kFormatVnB, i)) {
+      active = last_active;
+      last_active = last_active && !pm.IsActive(kFormatVnB, i);
+    }
+    pd.SetActive(kFormatVnB, i, active);
+  }
+
+  return pd;
+}
+
+LogicPRegister Simulator::brkpb(LogicPRegister pd,
+                                const LogicPRegister& pg,
+                                const LogicPRegister& pn,
+                                const LogicPRegister& pm) {
+  bool last_active = IsLastActive(kFormatVnB, pg, pn);
+
+  for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
+    bool active = false;
+    if (pg.IsActive(kFormatVnB, i)) {
+      last_active = last_active && !pm.IsActive(kFormatVnB, i);
+      active = last_active;
+    }
+    pd.SetActive(kFormatVnB, i, active);
+  }
+
+  return pd;
+}
+
+void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
+                                           const LogicPRegister& pg,
+                                           unsigned zt_code,
+                                           const LogicSVEAddressVector& addr,
+                                           SVEFaultTolerantLoadType type,
+                                           bool is_signed) {
+  int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
+  int msize_in_bits = addr.GetMsizeInBits();
+  int msize_in_bytes = addr.GetMsizeInBytes();
+
+  VIXL_ASSERT(zt_code < kNumberOfZRegisters);
+  VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
+  VIXL_ASSERT(addr.GetRegCount() == 1);
+
+  LogicVRegister zt = ReadVRegister(zt_code);
+  LogicPRegister ffr = ReadFFR();
+
+  // Non-faulting loads are allowed to fail arbitrarily. To stress user
+  // code, fail a random element in roughly one in eight full-vector loads.
+  uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
+  int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
+
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    uint64_t value = 0;
+
+    if (pg.IsActive(vform, i)) {
+      uint64_t element_address = addr.GetElementAddress(i, 0);
+
+      if (type == kSVEFirstFaultLoad) {
+        // First-faulting loads always load the first active element, regardless
+        // of FFR. The result will be discarded if its FFR lane is inactive, but
+        // it could still generate a fault.
+        value = Memory::Read(msize_in_bytes, element_address);
+        // All subsequent elements have non-fault semantics.
+        type = kSVENonFaultLoad;
+
+      } else if (ffr.IsActive(vform, i)) {
+        // Simulation of fault-tolerant loads relies on system calls, and is
+        // likely to be relatively slow, so we only actually perform the load if
+        // its FFR lane is active.
+
+        bool can_read = (i < fake_fault_at_lane) &&
+                        CanReadMemory(element_address, msize_in_bytes);
+        if (can_read) {
+          value = Memory::Read(msize_in_bytes, element_address);
+        } else {
+          // Propagate the fault to the end of FFR.
+          for (int j = i; j < LaneCountFromFormat(vform); j++) {
+            ffr.SetActive(vform, j, false);
+          }
+        }
+      }
+    }
+
+    // The architecture permits a few possible results for inactive FFR lanes
+    // (including those caused by a fault in this instruction). We choose to
+    // leave the register value unchanged (like merging predication) because
+    // no other input to this instruction can have the same behaviour.
+    //
+    // Note that this behaviour takes precedence over pg's zeroing predication.
+
+    if (ffr.IsActive(vform, i)) {
+      int msb = msize_in_bits - 1;
+      if (is_signed) {
+        zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
+      } else {
+        zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
+      }
+    }
+  }
+
+  if (ShouldTraceVRegs()) {
+    PrintRegisterFormat format = GetPrintRegisterFormat(vform);
+    if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
+      // Use an FP format where it's likely that we're accessing FP data.
+      format = GetPrintRegisterFormatTryFP(format);
+    }
+    // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
+    // expects a single mask, so combine the two predicates.
+    SimPRegister mask;
+    SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
+    PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
+  }
+}
+
+void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
+                                                    VectorFormat vform,
+                                                    SVEOffsetModifier mod) {
+  bool is_signed = instr->ExtractBit(14) == 0;
+  bool is_ff = instr->ExtractBit(13) == 1;
+  // Note that these instructions don't use the Dtype encoding.
+  int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
+  int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+  uint64_t base = ReadXRegister(instr->GetRn());
+  LogicSVEAddressVector addr(base,
+                             &ReadVRegister(instr->GetRm()),
+                             vform,
+                             mod,
+                             scale);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  if (is_ff) {
+    SVEFaultTolerantLoadHelper(vform,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr,
+                               kSVEFirstFaultLoad,
+                               is_signed);
+  } else {
+    SVEStructuredLoadHelper(vform,
+                            ReadPRegister(instr->GetPgLow8()),
+                            instr->GetRt(),
+                            addr,
+                            is_signed);
+  }
+}
+
+int Simulator::GetFirstActive(VectorFormat vform,
+                              const LogicPRegister& pg) const {
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    if (pg.IsActive(vform, i)) return i;
+  }
+  return -1;
+}
+
+int Simulator::GetLastActive(VectorFormat vform,
+                             const LogicPRegister& pg) const {
+  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+    if (pg.IsActive(vform, i)) return i;
+  }
+  return -1;
+}
+
+int Simulator::CountActiveLanes(VectorFormat vform,
+                                const LogicPRegister& pg) const {
+  int count = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    count += pg.IsActive(vform, i) ? 1 : 0;
+  }
+  return count;
+}
+
+int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
+                                       const LogicPRegister& pg,
+                                       const LogicPRegister& pn) const {
+  int count = 0;
+  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+    count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
+  }
+  return count;
+}
+
+int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
+                                               int pattern) const {
+  VIXL_ASSERT(IsSVEFormat(vform));
+  int all = LaneCountFromFormat(vform);
+  VIXL_ASSERT(all > 0);
+
+  switch (pattern) {
+    case SVE_VL1:
+    case SVE_VL2:
+    case SVE_VL3:
+    case SVE_VL4:
+    case SVE_VL5:
+    case SVE_VL6:
+    case SVE_VL7:
+    case SVE_VL8:
+      // VL1-VL8 are encoded directly.
+      VIXL_STATIC_ASSERT(SVE_VL1 == 1);
+      VIXL_STATIC_ASSERT(SVE_VL8 == 8);
+      return (pattern <= all) ? pattern : 0;
+    case SVE_VL16:
+    case SVE_VL32:
+    case SVE_VL64:
+    case SVE_VL128:
+    case SVE_VL256: {
+      // VL16-VL256 are encoded as log2(N) + c.
+      int min = 16 << (pattern - SVE_VL16);
+      return (min <= all) ? min : 0;
+    }
+    // Special cases.
+    case SVE_POW2:
+      return 1 << HighestSetBitPosition(all);
+    case SVE_MUL4:
+      return all - (all % 4);
+    case SVE_MUL3:
+      return all - (all % 3);
+    case SVE_ALL:
+      return all;
+  }
+  // Unnamed cases archicturally return 0.
+  return 0;
+}
+
+uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
+  if (IsContiguous()) {
+    return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
+  }
+
+  VIXL_ASSERT(IsScatterGather());
+  VIXL_ASSERT(vector_ != NULL);
+
+  // For scatter-gather accesses, we need to extract the offset from vector_,
+  // and apply modifiers.
+
+  uint64_t offset = 0;
+  switch (vector_form_) {
+    case kFormatVnS:
+      offset = vector_->GetLane<uint32_t>(lane);
+      break;
+    case kFormatVnD:
+      offset = vector_->GetLane<uint64_t>(lane);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  switch (vector_mod_) {
+    case SVE_MUL_VL:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case SVE_LSL:
+      // We apply the shift below. There's nothing to do here.
+      break;
+    case NO_SVE_OFFSET_MODIFIER:
+      VIXL_ASSERT(vector_shift_ == 0);
+      break;
+    case SVE_UXTW:
+      offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
+      break;
+    case SVE_SXTW:
+      offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
+      break;
+  }
+
+  return base_ + (offset << vector_shift_);
+}
+
 
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index 85954fc9..56c6eaf6 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -65,7 +65,7 @@ LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
 
 
 void LiteralPool::Reset() {
-  std::vector<RawLiteral *>::iterator it, end;
+  std::vector<RawLiteral*>::iterator it, end;
   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     RawLiteral* literal = *it;
     if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
@@ -145,7 +145,7 @@ void LiteralPool::Emit(EmitOption option) {
     }
 
     // Now populate the literal pool.
-    std::vector<RawLiteral *>::iterator it, end;
+    std::vector<RawLiteral*>::iterator it, end;
     for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
       VIXL_ASSERT((*it)->IsUsed());
       masm_->place(*it);
@@ -321,11 +321,13 @@ MacroAssembler::MacroAssembler(PositionIndependentCodeOption pic)
       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
       sp_(sp),
       tmp_list_(ip0, ip1),
-      fptmp_list_(d31),
+      v_tmp_list_(d31),
+      p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
       current_scratch_scope_(NULL),
       literal_pool_(this),
       veneer_pool_(this),
-      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+      recommended_checkpoint_(Pool::kNoCheckpointRequired),
+      fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
   checkpoint_ = GetNextCheckPoint();
 #ifndef VIXL_DEBUG
   USE(allow_macro_instructions_);
@@ -342,11 +344,13 @@ MacroAssembler::MacroAssembler(size_t capacity,
       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
       sp_(sp),
       tmp_list_(ip0, ip1),
-      fptmp_list_(d31),
+      v_tmp_list_(d31),
+      p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
       current_scratch_scope_(NULL),
       literal_pool_(this),
       veneer_pool_(this),
-      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+      recommended_checkpoint_(Pool::kNoCheckpointRequired),
+      fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
   checkpoint_ = GetNextCheckPoint();
 }
 
@@ -361,11 +365,13 @@ MacroAssembler::MacroAssembler(byte* buffer,
       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
       sp_(sp),
       tmp_list_(ip0, ip1),
-      fptmp_list_(d31),
+      v_tmp_list_(d31),
+      p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
       current_scratch_scope_(NULL),
       literal_pool_(this),
       veneer_pool_(this),
-      recommended_checkpoint_(Pool::kNoCheckpointRequired) {
+      recommended_checkpoint_(Pool::kNoCheckpointRequired),
+      fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
   checkpoint_ = GetNextCheckPoint();
 }
 
@@ -819,6 +825,12 @@ void MacroAssembler::LogicalMacro(const Register& rd,
   //  * 1 instruction to move to sp
   MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);
+  // Use `rd` as a temp, if we can.
+  temps.Include(rd);
+  // We read `rn` after evaluating `operand`.
+  temps.Exclude(rn);
+  // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
+  // because we don't need it after it is evaluated.
 
   if (operand.IsImmediate()) {
     uint64_t immediate = operand.GetImmediate();
@@ -886,6 +898,7 @@ void MacroAssembler::LogicalMacro(const Register& rd,
     } else {
       // Immediate can't be encoded: synthesize using move immediate.
       Register temp = temps.AcquireSameSizeAs(rn);
+      VIXL_ASSERT(!temp.Aliases(rn));
 
       // If the left-hand input is the stack pointer, we can't pre-shift the
       // immediate, as the encoding won't allow the subsequent post shift.
@@ -910,8 +923,8 @@ void MacroAssembler::LogicalMacro(const Register& rd,
         operand.GetRegister().Is64Bits() ||
         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
 
-    temps.Exclude(operand.GetRegister());
     Register temp = temps.AcquireSameSizeAs(rn);
+    VIXL_ASSERT(!temp.Aliases(rn));
     EmitExtendShift(temp,
                     operand.GetRegister(),
                     operand.GetExtend(),
@@ -1139,17 +1152,13 @@ void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
     // Call the macro assembler for generic immediates.
     Mvn(rd, operand.GetImmediate());
   } else if (operand.IsExtendedRegister()) {
-    UseScratchRegisterScope temps(this);
-    temps.Exclude(operand.GetRegister());
-
     // Emit two instructions for the extend case. This differs from Mov, as
     // the extend and invert can't be achieved in one instruction.
-    Register temp = temps.AcquireSameSizeAs(rd);
-    EmitExtendShift(temp,
+    EmitExtendShift(rd,
                     operand.GetRegister(),
                     operand.GetExtend(),
                     operand.GetShiftAmount());
-    mvn(rd, Operand(temp));
+    mvn(rd, rd);
   } else {
     // Otherwise, register and shifted register cases can be handled by the
     // assembler directly, using orn.
@@ -1418,12 +1427,15 @@ void MacroAssembler::Add(const Register& rd,
                          const Operand& operand,
                          FlagsUpdate S) {
   VIXL_ASSERT(allow_macro_instructions_);
-  if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
-      IsImmAddSub(-operand.GetImmediate())) {
-    AddSubMacro(rd, rn, -operand.GetImmediate(), S, SUB);
-  } else {
-    AddSubMacro(rd, rn, operand, S, ADD);
+  if (operand.IsImmediate()) {
+    int64_t imm = operand.GetImmediate();
+    if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
+        IsImmAddSub(-imm)) {
+      AddSubMacro(rd, rn, -imm, S, SUB);
+      return;
+    }
   }
+  AddSubMacro(rd, rn, operand, S, ADD);
 }
 
 
@@ -1439,12 +1451,15 @@ void MacroAssembler::Sub(const Register& rd,
                          const Operand& operand,
                          FlagsUpdate S) {
   VIXL_ASSERT(allow_macro_instructions_);
-  if (operand.IsImmediate() && (operand.GetImmediate() < 0) &&
-      IsImmAddSub(-operand.GetImmediate())) {
-    AddSubMacro(rd, rn, -operand.GetImmediate(), S, ADD);
-  } else {
-    AddSubMacro(rd, rn, operand, S, SUB);
+  if (operand.IsImmediate()) {
+    int64_t imm = operand.GetImmediate();
+    if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
+        IsImmAddSub(-imm)) {
+      AddSubMacro(rd, rn, -imm, S, ADD);
+      return;
+    }
   }
+  AddSubMacro(rd, rn, operand, S, SUB);
 }
 
 
@@ -1757,6 +1772,12 @@ void MacroAssembler::AddSubMacro(const Register& rd,
       (rn.IsZero() && !operand.IsShiftedRegister()) ||
       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
     UseScratchRegisterScope temps(this);
+    // Use `rd` as a temp, if we can.
+    temps.Include(rd);
+    // We read `rn` after evaluating `operand`.
+    temps.Exclude(rn);
+    // It doesn't matter if `operand` is in `temps` (e.g. because it alises
+    // `rd`) because we don't need it after it is evaluated.
     Register temp = temps.AcquireSameSizeAs(rn);
     if (operand.IsImmediate()) {
       PreShiftImmMode mode = kAnyShift;
@@ -1842,6 +1863,12 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
   //  * 1 instruction for add/sub
   MacroEmissionCheckScope guard(this);
   UseScratchRegisterScope temps(this);
+  // Use `rd` as a temp, if we can.
+  temps.Include(rd);
+  // We read `rn` after evaluating `operand`.
+  temps.Exclude(rn);
+  // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
+  // because we don't need it after it is evaluated.
 
   if (operand.IsImmediate() ||
       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
@@ -1856,7 +1883,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
     VIXL_ASSERT(
         IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
                 operand.GetShiftAmount()));
-    temps.Exclude(operand.GetRegister());
     Register temp = temps.AcquireSameSizeAs(rn);
     EmitShift(temp,
               operand.GetRegister(),
@@ -1872,7 +1898,6 @@ void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
     VIXL_ASSERT(
         operand.GetRegister().Is64Bits() ||
         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
-    temps.Exclude(operand.GetRegister());
     Register temp = temps.AcquireSameSizeAs(rn);
     EmitExtendShift(temp,
                     operand.GetRegister(),
@@ -2397,7 +2422,8 @@ void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
   // We do not handle pre-indexing or post-indexing.
   VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
   VIXL_ASSERT(!registers.Overlaps(tmp_list_));
-  VIXL_ASSERT(!registers.Overlaps(fptmp_list_));
+  VIXL_ASSERT(!registers.Overlaps(v_tmp_list_));
+  VIXL_ASSERT(!registers.Overlaps(p_tmp_list_));
   VIXL_ASSERT(!registers.IncludesAliasOf(sp));
 
   UseScratchRegisterScope temps(this);
@@ -2481,7 +2507,7 @@ void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
 }
 
 
-// TODO(all): Fix printf for NEON registers.
+// TODO(all): Fix printf for NEON and SVE registers.
 
 // This is the main Printf implementation. All callee-saved registers are
 // preserved, but NZCV and the caller-saved registers may be clobbered.
@@ -2764,32 +2790,6 @@ void MacroAssembler::Log(TraceParameters parameters) {
 }
 
 
-void MacroAssembler::EnableInstrumentation() {
-  VIXL_ASSERT(!isprint(InstrumentStateEnable));
-  ExactAssemblyScope scope(this, kInstructionSize);
-  movn(xzr, InstrumentStateEnable);
-}
-
-
-void MacroAssembler::DisableInstrumentation() {
-  VIXL_ASSERT(!isprint(InstrumentStateDisable));
-  ExactAssemblyScope scope(this, kInstructionSize);
-  movn(xzr, InstrumentStateDisable);
-}
-
-
-void MacroAssembler::AnnotateInstrumentation(const char* marker_name) {
-  VIXL_ASSERT(strlen(marker_name) == 2);
-
-  // We allow only printable characters in the marker names. Unprintable
-  // characters are reserved for controlling features of the instrumentation.
-  VIXL_ASSERT(isprint(marker_name[0]) && isprint(marker_name[1]));
-
-  ExactAssemblyScope scope(this, kInstructionSize);
-  movn(xzr, (marker_name[1] << 8) | marker_name[0]);
-}
-
-
 void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) {
   ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode);
 }
@@ -2870,10 +2870,13 @@ void UseScratchRegisterScope::Open(MacroAssembler* masm) {
 
   CPURegList* available = masm->GetScratchRegisterList();
   CPURegList* available_v = masm->GetScratchVRegisterList();
+  CPURegList* available_p = masm->GetScratchPRegisterList();
   old_available_ = available->GetList();
   old_available_v_ = available_v->GetList();
+  old_available_p_ = available_p->GetList();
   VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
   VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister);
+  VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister);
 
   parent_ = masm->GetCurrentScratchRegisterScope();
   masm->SetCurrentScratchRegisterScope(this);
@@ -2891,6 +2894,7 @@ void UseScratchRegisterScope::Close() {
 
     masm_->GetScratchRegisterList()->SetList(old_available_);
     masm_->GetScratchVRegisterList()->SetList(old_available_v_);
+    masm_->GetScratchPRegisterList()->SetList(old_available_p_);
 
     masm_ = NULL;
   }
@@ -2899,44 +2903,46 @@ void UseScratchRegisterScope::Close() {
 
 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
   return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
-         masm_->GetScratchVRegisterList()->IncludesAliasOf(reg);
+         masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) ||
+         masm_->GetScratchPRegisterList()->IncludesAliasOf(reg);
 }
 
-
 Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
-  int code = AcquireNextAvailable(masm_->GetScratchRegisterList()).GetCode();
+  int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode();
   return Register(code, size_in_bits);
 }
 
 
 VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
-  int code = AcquireNextAvailable(masm_->GetScratchVRegisterList()).GetCode();
+  int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode();
   return VRegister(code, size_in_bits);
 }
 
 
 void UseScratchRegisterScope::Release(const CPURegister& reg) {
   VIXL_ASSERT(masm_ != NULL);
-  if (reg.IsRegister()) {
-    ReleaseByCode(masm_->GetScratchRegisterList(), reg.GetCode());
-  } else if (reg.IsVRegister()) {
-    ReleaseByCode(masm_->GetScratchVRegisterList(), reg.GetCode());
-  } else {
-    VIXL_ASSERT(reg.IsNone());
-  }
+
+  // Release(NoReg) has no effect.
+  if (reg.IsNone()) return;
+
+  ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode());
 }
 
 
 void UseScratchRegisterScope::Include(const CPURegList& list) {
   VIXL_ASSERT(masm_ != NULL);
+
+  // Including an empty list has no effect.
+  if (list.IsEmpty()) return;
+  VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister);
+
+  RegList reg_list = list.GetList();
   if (list.GetType() == CPURegister::kRegister) {
     // Make sure that neither sp nor xzr are included the list.
-    IncludeByRegList(masm_->GetScratchRegisterList(),
-                     list.GetList() & ~(xzr.GetBit() | sp.GetBit()));
-  } else {
-    VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
-    IncludeByRegList(masm_->GetScratchVRegisterList(), list.GetList());
+    reg_list &= ~(xzr.GetBit() | sp.GetBit());
   }
+
+  IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list);
 }
 
 
@@ -2964,13 +2970,43 @@ void UseScratchRegisterScope::Include(const VRegister& reg1,
 }
 
 
-void UseScratchRegisterScope::Exclude(const CPURegList& list) {
-  if (list.GetType() == CPURegister::kRegister) {
-    ExcludeByRegList(masm_->GetScratchRegisterList(), list.GetList());
-  } else {
-    VIXL_ASSERT(list.GetType() == CPURegister::kVRegister);
-    ExcludeByRegList(masm_->GetScratchVRegisterList(), list.GetList());
+void UseScratchRegisterScope::Include(const CPURegister& reg1,
+                                      const CPURegister& reg2,
+                                      const CPURegister& reg3,
+                                      const CPURegister& reg4) {
+  RegList include = 0;
+  RegList include_v = 0;
+  RegList include_p = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4};
+
+  for (size_t i = 0; i < ArrayLength(regs); i++) {
+    RegList bit = regs[i].GetBit();
+    switch (regs[i].GetBank()) {
+      case CPURegister::kNoRegisterBank:
+        // Include(NoReg) has no effect.
+        VIXL_ASSERT(regs[i].IsNone());
+        break;
+      case CPURegister::kRRegisterBank:
+        include |= bit;
+        break;
+      case CPURegister::kVRegisterBank:
+        include_v |= bit;
+        break;
+      case CPURegister::kPRegisterBank:
+        include_p |= bit;
+        break;
+    }
   }
+
+  IncludeByRegList(masm_->GetScratchRegisterList(), include);
+  IncludeByRegList(masm_->GetScratchVRegisterList(), include_v);
+  IncludeByRegList(masm_->GetScratchPRegisterList(), include_p);
+}
+
+
+void UseScratchRegisterScope::Exclude(const CPURegList& list) {
+  ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList());
 }
 
 
@@ -2988,9 +3024,9 @@ void UseScratchRegisterScope::Exclude(const VRegister& reg1,
                                       const VRegister& reg2,
                                       const VRegister& reg3,
                                       const VRegister& reg4) {
-  RegList excludefp =
+  RegList exclude_v =
       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
-  ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp);
+  ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
 }
 
 
@@ -2999,22 +3035,33 @@ void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
                                       const CPURegister& reg3,
                                       const CPURegister& reg4) {
   RegList exclude = 0;
-  RegList excludefp = 0;
+  RegList exclude_v = 0;
+  RegList exclude_p = 0;
 
   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
 
   for (size_t i = 0; i < ArrayLength(regs); i++) {
-    if (regs[i].IsRegister()) {
-      exclude |= regs[i].GetBit();
-    } else if (regs[i].IsVRegister()) {
-      excludefp |= regs[i].GetBit();
-    } else {
-      VIXL_ASSERT(regs[i].IsNone());
+    RegList bit = regs[i].GetBit();
+    switch (regs[i].GetBank()) {
+      case CPURegister::kNoRegisterBank:
+        // Exclude(NoReg) has no effect.
+        VIXL_ASSERT(regs[i].IsNone());
+        break;
+      case CPURegister::kRRegisterBank:
+        exclude |= bit;
+        break;
+      case CPURegister::kVRegisterBank:
+        exclude_v |= bit;
+        break;
+      case CPURegister::kPRegisterBank:
+        exclude_p |= bit;
+        break;
     }
   }
 
   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
-  ExcludeByRegList(masm_->GetScratchVRegisterList(), excludefp);
+  ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
+  ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p);
 }
 
 
@@ -3023,13 +3070,15 @@ void UseScratchRegisterScope::ExcludeAll() {
                    masm_->GetScratchRegisterList()->GetList());
   ExcludeByRegList(masm_->GetScratchVRegisterList(),
                    masm_->GetScratchVRegisterList()->GetList());
+  ExcludeByRegList(masm_->GetScratchPRegisterList(),
+                   masm_->GetScratchPRegisterList()->GetList());
 }
 
 
-CPURegister UseScratchRegisterScope::AcquireNextAvailable(
-    CPURegList* available) {
-  VIXL_CHECK(!available->IsEmpty());
-  CPURegister result = available->PopLowestIndex();
+CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available,
+                                                 RegList mask) {
+  VIXL_CHECK((available->GetList() & mask) != 0);
+  CPURegister result = available->PopLowestIndex(mask);
   VIXL_ASSERT(!AreAliased(result, xzr, sp));
   return result;
 }
@@ -3057,5 +3106,22 @@ void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
   available->SetList(available->GetList() & ~exclude);
 }
 
+CPURegList* UseScratchRegisterScope::GetAvailableListFor(
+    CPURegister::RegisterBank bank) {
+  switch (bank) {
+    case CPURegister::kNoRegisterBank:
+      return NULL;
+    case CPURegister::kRRegisterBank:
+      return masm_->GetScratchRegisterList();
+    case CPURegister::kVRegisterBank:
+      return masm_->GetScratchVRegisterList();
+    case CPURegister::kPRegisterBank:
+      return masm_->GetScratchPRegisterList();
+      return NULL;
+  }
+  VIXL_UNREACHABLE();
+  return NULL;
+}
+
 }  // namespace aarch64
 }  // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 31db8dab..8becddbb 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -35,7 +35,6 @@
 #include "../macro-assembler-interface.h"
 
 #include "assembler-aarch64.h"
-#include "instrument-aarch64.h"
 // Required for runtime call support.
 // TODO: Break this dependency. We should be able to separate out the necessary
 // parts so that we don't need to include the whole simulator header.
@@ -61,7 +60,7 @@
 #define LSPAIR_MACRO_LIST(V)                             \
   V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2))  \
   V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \
-  V(Ldpsw, CPURegister&, rt, rt2, LDPSW_x)
+  V(Ldpsw, Register&, rt, rt2, LDPSW_x)
 
 namespace vixl {
 namespace aarch64 {
@@ -528,6 +527,57 @@ class MacroEmissionCheckScope : public EmissionCheckScope {
 };
 
 
+// This scope simplifies the handling of the SVE `movprfx` instruction.
+//
+// If dst.Aliases(src):
+// - Start an ExactAssemblyScope(masm, kInstructionSize).
+// Otherwise:
+// - Start an ExactAssemblyScope(masm, 2 * kInstructionSize).
+// - Generate a suitable `movprfx` instruction.
+//
+// In both cases, the ExactAssemblyScope is left with enough remaining space for
+// exactly one destructive instruction.
+class MovprfxHelperScope : public ExactAssemblyScope {
+ public:
+  inline MovprfxHelperScope(MacroAssembler* masm,
+                            const ZRegister& dst,
+                            const ZRegister& src);
+
+  inline MovprfxHelperScope(MacroAssembler* masm,
+                            const ZRegister& dst,
+                            const PRegister& pg,
+                            const ZRegister& src);
+
+  // TODO: Implement constructors that examine _all_ sources. If `dst` aliases
+  // any other source register, we can't use `movprfx`. This isn't obviously
+  // useful, but the MacroAssembler should not generate invalid code for it.
+  // Valid behaviour can be implemented using `mov`.
+  //
+  // The best way to handle this in an instruction-agnostic way is probably to
+  // use variadic templates.
+
+ private:
+  inline bool ShouldGenerateMovprfx(const ZRegister& dst,
+                                    const ZRegister& src) {
+    VIXL_ASSERT(AreSameLaneSize(dst, src));
+    return !dst.Aliases(src);
+  }
+
+  inline bool ShouldGenerateMovprfx(const ZRegister& dst,
+                                    const PRegister& pg,
+                                    const ZRegister& src) {
+    VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing());
+    // We need to emit movprfx in two cases:
+    //  1. To give a predicated merging unary instruction zeroing predication.
+    //  2. To make destructive instructions constructive.
+    //
+    // There are no predicated zeroing instructions that can take movprfx, so we
+    // will never generate an unnecessary movprfx with this logic.
+    return pg.IsZeroing() || ShouldGenerateMovprfx(dst, src);
+  }
+};
+
+
 enum BranchType {
   // Copies of architectural conditions.
   // The associated conditions can be used in place of those, the code will
@@ -566,7 +616,19 @@ enum BranchType {
   kBranchTypeFirstCondition = eq,
   kBranchTypeLastCondition = nv,
   kBranchTypeFirstUsingReg = reg_zero,
-  kBranchTypeFirstUsingBit = reg_bit_clear
+  kBranchTypeFirstUsingBit = reg_bit_clear,
+
+  // SVE branch conditions.
+  integer_none = eq,
+  integer_any = ne,
+  integer_nlast = cs,
+  integer_last = cc,
+  integer_first = mi,
+  integer_nfrst = pl,
+  integer_pmore = hi,
+  integer_plast = ls,
+  integer_tcont = ge,
+  integer_tstop = lt
 };
 
 
@@ -587,6 +649,18 @@ enum PreShiftImmMode {
   kAnyShift          // Allow any pre-shift.
 };
 
+enum FPMacroNaNPropagationOption {
+  // The default option. This generates a run-time error in macros that respect
+  // this option.
+  NoFPMacroNaNPropagationSelected,
+  // For example, Fmin(result, NaN(a), NaN(b)) always selects NaN(a) if both
+  // NaN(a) and NaN(b) are both quiet, or both are signalling, at the
+  // cost of extra code generation in some cases.
+  StrictNaNPropagation,
+  // For example, Fmin(result, NaN(a), NaN(b)) selects either NaN, but using the
+  // fewest instructions.
+  FastNaNPropagation
+};
 
 class MacroAssembler : public Assembler, public MacroAssemblerInterface {
  public:
@@ -946,6 +1020,20 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   void Claim(const Operand& size);
   void Drop(const Operand& size);
 
+  // As above, but for multiples of the SVE vector length.
+  void ClaimVL(int64_t multiplier) {
+    // We never need to worry about sp alignment because the VL is always a
+    // multiple of 16.
+    VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0);
+    VIXL_ASSERT(multiplier >= 0);
+    Addvl(sp, sp, -multiplier);
+  }
+  void DropVL(int64_t multiplier) {
+    VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0);
+    VIXL_ASSERT(multiplier >= 0);
+    Addvl(sp, sp, multiplier);
+  }
+
   // Preserve the callee-saved registers (as defined by AAPCS64).
   //
   // Higher-numbered registers are pushed before lower-numbered registers, and
@@ -1489,13 +1577,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   void Fmov(const VRegister& vd, const VRegister& vn) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
-    // Only emit an instruction if vd and vn are different, and they are both D
-    // registers. fmov(s0, s0) is not a no-op because it clears the top word of
-    // d0. Technically, fmov(d0, d0) is not a no-op either because it clears
-    // the top of q0, but VRegister does not currently support Q registers.
-    if (!vd.Is(vn) || !vd.Is64Bits()) {
-      fmov(vd, vn);
-    }
+    // TODO: Use DiscardMoveMode to allow this move to be elided if vd.Is(vn).
+    fmov(vd, vn);
   }
   void Fmov(const VRegister& vd, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
@@ -1503,12 +1586,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
     SingleEmissionCheckScope guard(this);
     fmov(vd, rn);
   }
-  void Fmov(const VRegister& vd, const XRegister& xn) {
-    Fmov(vd, Register(xn));
-  }
-  void Fmov(const VRegister& vd, const WRegister& wn) {
-    Fmov(vd, Register(wn));
-  }
   void Fmov(const VRegister& vd, int index, const Register& rn) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -2970,6 +3047,43 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   NEON_2VREG_SHIFT_LONG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
 #undef DEFINE_MACRO_ASM_FUNC
 
+// SVE 3 vector register instructions.
+#define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \
+  V(add, Add)                               \
+  V(and_, And)                              \
+  V(bic, Bic)                               \
+  V(eor, Eor)                               \
+  V(mul, Mul)                               \
+  V(orr, Orr)                               \
+  V(sabd, Sabd)                             \
+  V(smax, Smax)                             \
+  V(smulh, Smulh)                           \
+  V(smin, Smin)                             \
+  V(uabd, Uabd)                             \
+  V(umax, Umax)                             \
+  V(umin, Umin)                             \
+  V(umulh, Umulh)
+
+#define DEFINE_MACRO_ASM_FUNC(ASM, MASM)          \
+  void MASM(const ZRegister& zd,                  \
+            const PRegisterM& pg,                 \
+            const ZRegister& zn,                  \
+            const ZRegister& zm) {                \
+    VIXL_ASSERT(allow_macro_instructions_);       \
+    if (zd.Aliases(zn)) {                         \
+      SingleEmissionCheckScope guard(this);       \
+      ASM(zd, pg, zd, zm);                        \
+    } else if (zd.Aliases(zm)) {                  \
+      SingleEmissionCheckScope guard(this);       \
+      ASM(zd, pg, zd, zn);                        \
+    } else {                                      \
+      MovprfxHelperScope guard(this, zd, pg, zn); \
+      ASM(zd, pg, zd, zm);                        \
+    }                                             \
+  }
+  SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
+#undef DEFINE_MACRO_ASM_FUNC
+
   void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
     VIXL_ASSERT(allow_macro_instructions_);
     SingleEmissionCheckScope guard(this);
@@ -3357,6 +3471,2901 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
     crc32cx(rd, rn, rm);
   }
 
+  // Scalable Vector Extensions.
+  void Abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    abs(zd, pg, zn);
+  }
+  void Add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    add(zd, zn, zm);
+  }
+  void Add(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    AddSubHelper(kAddImmediate, zd, zn, imm);
+  }
+  void Addpl(const Register& xd, const Register& xn, int64_t multiplier);
+  void Addvl(const Register& xd, const Register& xn, int64_t multiplier);
+  // Note that unlike the core ISA, SVE's `adr` is not PC-relative.
+  void Adr(const ZRegister& zd, const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    adr(zd, addr);
+  }
+  void And(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    and_(pd, pg, pn, pm);
+  }
+  void And(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      and_(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void And(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+    SingleEmissionCheckScope guard(this);
+    and_(zd.VnD(), zn.VnD(), zm.VnD());
+  }
+  void Ands(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ands(pd, pg, pn, pm);
+  }
+  void Andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    andv(vd, pg, zn);
+  }
+  void Asr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    asr(zd, pg, zd, shift);
+  }
+  void Asr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  void Asr(const ZRegister& zd, const ZRegister& zn, int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    asr(zd, zn, shift);
+  }
+  void Asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    asr(zd, zn, zm);
+  }
+  void Asrd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    asrd(zd, pg, zd, shift);
+  }
+  void Bic(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    bic(pd, pg, pn, pm);
+  }
+  void Bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+    SingleEmissionCheckScope guard(this);
+    bic(zd.VnD(), zn.VnD(), zm.VnD());
+  }
+  void Bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      bic(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Bics(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    bics(pd, pg, pn, pm);
+  }
+  void Brka(const PRegisterWithLaneSize& pd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brka(pd, pg, pn);
+  }
+  void Brkas(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkas(pd, pg, pn);
+  }
+  void Brkb(const PRegisterWithLaneSize& pd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkb(pd, pg, pn);
+  }
+  void Brkbs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkbs(pd, pg, pn);
+  }
+  void Brkn(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    if (!pd.Aliases(pm)) {
+      Mov(pd, pm);
+    }
+    SingleEmissionCheckScope guard(this);
+    brkn(pd, pg, pn, pd);
+  }
+  void Brkns(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    if (!pd.Aliases(pm)) {
+      Mov(pd, pm);
+    }
+    SingleEmissionCheckScope guard(this);
+    brkns(pd, pg, pn, pd);
+  }
+  void Brkpa(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkpa(pd, pg, pn, pm);
+  }
+  void Brkpas(const PRegisterWithLaneSize& pd,
+              const PRegisterZ& pg,
+              const PRegisterWithLaneSize& pn,
+              const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkpas(pd, pg, pn, pm);
+  }
+  void Brkpb(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkpb(pd, pg, pn, pm);
+  }
+  void Brkpbs(const PRegisterWithLaneSize& pd,
+              const PRegisterZ& pg,
+              const PRegisterWithLaneSize& pn,
+              const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    brkpbs(pd, pg, pn, pm);
+  }
+  void Clasta(const Register& rd,
+              const PRegister& pg,
+              const Register& rn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    clasta(rd, pg, rn, zm);
+  }
+  void Clasta(const VRegister& vd,
+              const PRegister& pg,
+              const VRegister& vn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    clasta(vd, pg, vn, zm);
+  }
+  void Clasta(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+  void Clastb(const Register& rd,
+              const PRegister& pg,
+              const Register& rn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    clastb(rd, pg, rn, zm);
+  }
+  void Clastb(const VRegister& vd,
+              const PRegister& pg,
+              const VRegister& vn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    clastb(vd, pg, vn, zm);
+  }
+  void Clastb(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+  void Cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cls(zd, pg, zn);
+  }
+  void Clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    clz(zd, pg, zn);
+  }
+  void Cmpeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmpeq(pd, pg, zn, zm);
+  }
+  void Cmpeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmpeq(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(eq, pd, pg, zn, imm);
+    }
+  }
+  void Cmpge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmpge(pd, pg, zn, zm);
+  }
+  void Cmpge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmpge(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(ge, pd, pg, zn, imm);
+    }
+  }
+  void Cmpgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmpgt(pd, pg, zn, zm);
+  }
+  void Cmpgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmpgt(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(gt, pd, pg, zn, imm);
+    }
+  }
+  void Cmphi(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmphi(pd, pg, zn, zm);
+  }
+  void Cmphi(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    if (imm.IsUintN(7)) {
+      SingleEmissionCheckScope guard(this);
+      cmphi(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+    } else {
+      CompareHelper(hi, pd, pg, zn, imm);
+    }
+  }
+  void Cmphs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmphs(pd, pg, zn, zm);
+  }
+  void Cmphs(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    if (imm.IsUintN(7)) {
+      SingleEmissionCheckScope guard(this);
+      cmphs(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+    } else {
+      CompareHelper(hs, pd, pg, zn, imm);
+    }
+  }
+  void Cmple(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmple(pd, pg, zn, zm);
+  }
+  void Cmple(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmple(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(le, pd, pg, zn, imm);
+    }
+  }
+  void Cmplo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmplo(pd, pg, zn, zm);
+  }
+  void Cmplo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    if (imm.IsUintN(7)) {
+      SingleEmissionCheckScope guard(this);
+      cmplo(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+    } else {
+      CompareHelper(lo, pd, pg, zn, imm);
+    }
+  }
+  void Cmpls(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmpls(pd, pg, zn, zm);
+  }
+  void Cmpls(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    if (imm.IsUintN(7)) {
+      SingleEmissionCheckScope guard(this);
+      cmpls(pd, pg, zn, static_cast<unsigned>(imm.AsUintN(7)));
+    } else {
+      CompareHelper(ls, pd, pg, zn, imm);
+    }
+  }
+  void Cmplt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmplt(pd, pg, zn, zm);
+  }
+  void Cmplt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmplt(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(lt, pd, pg, zn, imm);
+    }
+  }
+  void Cmpne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cmpne(pd, pg, zn, zm);
+  }
+  void Cmpne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    int imm5;
+    if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) {
+      SingleEmissionCheckScope guard(this);
+      cmpne(pd, pg, zn, imm5);
+    } else {
+      CompareHelper(ne, pd, pg, zn, imm);
+    }
+  }
+  void Cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cnot(zd, pg, zn);
+  }
+  void Cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cnt(zd, pg, zn);
+  }
+  void Cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cntb(rd, pattern, multiplier);
+  }
+  void Cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cntd(rd, pattern, multiplier);
+  }
+  void Cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cnth(rd, pattern, multiplier);
+  }
+  void Cntp(const Register& rd,
+            const PRegister& pg,
+            const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    // The `cntp` instruction architecturally takes an X register, but the
+    // result will always be in the range [0, kPRegMaxSize] (and therefore
+    // always fits in a W register), so we can accept a W-sized rd here.
+    cntp(rd.X(), pg, pn);
+  }
+  void Cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cntw(rd, pattern, multiplier);
+  }
+  void Compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    compact(zd, pg, zn);
+  }
+  void Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm);
+  void Cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpy(zd, pg, rn);
+  }
+  void Cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    cpy(zd, pg, vn);
+  }
+  void Ctermeq(const Register& rn, const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ctermeq(rn, rm);
+  }
+  void Ctermne(const Register& rn, const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ctermne(rn, rm);
+  }
+  void Decb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decb(rdn, pattern, multiplier);
+  }
+  void Decd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decd(rdn, pattern, multiplier);
+  }
+  void Decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decd(zdn, pattern, multiplier);
+  }
+  void Dech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    dech(rdn, pattern, multiplier);
+  }
+  void Dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    dech(zdn, pattern, multiplier);
+  }
+  void Decp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decp(rdn, pg);
+  }
+  void Decp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `decp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    decp(zd, pg);
+  }
+  void Decp(const ZRegister& zdn, const PRegister& pg) { Decp(zdn, pg, zdn); }
+  void Decw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decw(rdn, pattern, multiplier);
+  }
+  void Decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    decw(zdn, pattern, multiplier);
+  }
+  void Dup(const ZRegister& zd, const Register& xn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    dup(zd, xn);
+  }
+  void Dup(const ZRegister& zd, const ZRegister& zn, int index) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    dup(zd, zn, index);
+  }
+  void Dup(const ZRegister& zd, IntegerOperand imm);
+  void Eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      eon(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Eor(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    eor(pd, pg, pn, pm);
+  }
+  void Eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      eor(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+    SingleEmissionCheckScope guard(this);
+    eor(zd.VnD(), zn.VnD(), zm.VnD());
+  }
+  void Eors(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    eors(pd, pg, pn, pm);
+  }
+  void Eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    eorv(vd, pg, zn);
+  }
+  void Ext(const ZRegister& zd,
+           const ZRegister& zn,
+           const ZRegister& zm,
+           unsigned offset);
+  void Fabd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            FPMacroNaNPropagationOption nan_option);
+  void Fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fabs(zd, pg, zn);
+  }
+  void Facge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    facge(pd, pg, zn, zm);
+  }
+  void Facgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    facgt(pd, pg, zn, zm);
+  }
+  void Facle(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    facge(pd, pg, zm, zn);
+  }
+  void Faclt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    facgt(pd, pg, zm, zn);
+  }
+  void Fadd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fadd(zd, pg, zd, imm);
+  }
+  void Fadd(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            FPMacroNaNPropagationOption nan_option);
+  void Fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fadd(zd, zn, zm);
+  }
+  void Fadda(const VRegister& vd,
+             const PRegister& pg,
+             const VRegister& vn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fadda(vd, pg, vn, zm);
+  }
+  void Faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    faddv(vd, pg, zn);
+  }
+  void Fcadd(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int rot);
+  void Fcmeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmeq(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmeq(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmeq(pd, pg, zn, zm);
+  }
+  void Fcmge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmge(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmge(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmge(pd, pg, zn, zm);
+  }
+  void Fcmgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmgt(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmgt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmgt(pd, pg, zn, zm);
+  }
+  void Fcmla(const ZRegister& zda,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int rot) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zda, pg, zda);
+    fcmla(zda, pg, zn, zm, rot);
+  }
+  void Fcmla(const ZRegister& zda,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int index,
+             int rot) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmla(zda, zn, zm, index, rot);
+  }
+  void Fcmle(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmle(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmle(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmge(pd, pg, zm, zn);
+  }
+  void Fcmlt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmlt(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmlt(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmgt(pd, pg, zm, zn);
+  }
+  void Fcmne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             double zero) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (zero == 0.0) {
+      fcmne(pd, pg, zn, zero);
+    } else {
+      // TODO: Synthesise other immediates.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Fcmne(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmne(pd, pg, zn, zm);
+  }
+  void Fcmuo(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const ZRegister& zn,
+             const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcmuo(pd, pg, zn, zm);
+  }
+  void Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
+  void Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm);
+  void Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm);
+  void Fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcvt(zd, pg, zn);
+  }
+  void Fcvt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    // The element type in this predicated movprfx is determined by the larger
+    // type between the source and destination.
+    int lane_size = std::max(zd.GetLaneSizeInBits(), zn.GetLaneSizeInBits());
+    MovprfxHelperScope guard(this,
+                             zd.WithLaneSize(lane_size),
+                             pg,
+                             zn.WithLaneSize(lane_size));
+    fcvt(zd, pg.Merging(), zn);
+  }
+  void Fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcvtzs(zd, pg, zn);
+  }
+  void Fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fcvtzu(zd, pg, zn);
+  }
+  void Fdiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Fdup(const ZRegister& zd, double imm);
+  void Fdup(const ZRegister& zd, float imm);
+  void Fdup(const ZRegister& zd, Float16 imm);
+  void Fexpa(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fexpa(zd, zn);
+  }
+  void Fmad(const ZRegister& zdn,
+            const PRegisterM& pg,
+            const ZRegister& zm,
+            const ZRegister& za) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmad(zdn, pg, zm, za);
+  }
+  void Fmax(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fmax(zd, pg, zd, imm);
+  }
+  void Fmax(
+      const ZRegister& zd,
+      const PRegisterM& pg,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Fmaxnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fmaxnm(zd, pg, zd, imm);
+  }
+  void Fmaxnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm,
+              FPMacroNaNPropagationOption nan_option);
+  void Fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmaxnmv(vd, pg, zn);
+  }
+  void Fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmaxv(vd, pg, zn);
+  }
+  void Fmin(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fmin(zd, pg, zd, imm);
+  }
+  void Fmin(
+      const ZRegister& zd,
+      const PRegisterM& pg,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Fminnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fminnm(zd, pg, zd, imm);
+  }
+  void Fminnm(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm,
+              FPMacroNaNPropagationOption nan_option);
+  void Fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fminnmv(vd, pg, zn);
+  }
+  void Fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fminv(vd, pg, zn);
+  }
+  // zd = za + (zn * zm)
+  void Fmla(
+      const ZRegister& zd,
+      const PRegisterM& pg,
+      const ZRegister& za,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Fmla(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+  // zd = za - (zn * zm)
+  void Fmls(
+      const ZRegister& zd,
+      const PRegisterM& pg,
+      const ZRegister& za,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Fmls(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+  void Fmov(const ZRegister& zd, double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fdup(zd, imm);
+  }
+  void Fmov(const ZRegister& zd, float imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fdup(zd, imm);
+  }
+  void Fmov(const ZRegister& zd, Float16 imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fdup(zd, imm);
+  }
+  void Fmov(const ZRegister& zd, const PRegisterM& pg, double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fcpy(zd, pg, imm);
+  }
+  void Fmov(const ZRegister& zd, const PRegisterM& pg, float imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fcpy(zd, pg, imm);
+  }
+  void Fmov(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Fcpy(zd, pg, imm);
+  }
+  void Fmsb(const ZRegister& zdn,
+            const PRegisterM& pg,
+            const ZRegister& zm,
+            const ZRegister& za) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmsb(zdn, pg, zm, za);
+  }
+  void Fmul(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fmul(zd, pg, zd, imm);
+  }
+  void Fmul(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            FPMacroNaNPropagationOption nan_option);
+  void Fmul(const ZRegister& zd,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            unsigned index) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmul(zd, zn, zm, index);
+  }
+  void Fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fmul(zd, zn, zm);
+  }
+  void Fmulx(const ZRegister& zd,
+             const PRegisterM& pg,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             FPMacroNaNPropagationOption nan_option);
+  void Fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fneg(zd, pg, zn);
+  }
+  void Fnmla(
+      const ZRegister& zda,
+      const PRegisterM& pg,
+      const ZRegister& za,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Fnmls(
+      const ZRegister& zd,
+      const PRegisterM& pg,
+      const ZRegister& za,
+      const ZRegister& zn,
+      const ZRegister& zm,
+      FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected);
+  void Frecpe(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frecpe(zd, zn);
+  }
+  void Frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frecps(zd, zn, zm);
+  }
+  void Frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frecpx(zd, pg, zn);
+  }
+  void Frecpx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frecpx(zd, pg.Merging(), zn);
+  }
+  void Frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frinta(zd, pg, zn);
+  }
+  void Frinta(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frinta(zd, pg.Merging(), zn);
+  }
+  void Frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frinti(zd, pg, zn);
+  }
+  void Frinti(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frinti(zd, pg.Merging(), zn);
+  }
+  void Frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintm(zd, pg, zn);
+  }
+  void Frintm(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frintm(zd, pg.Merging(), zn);
+  }
+  void Frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintn(zd, pg, zn);
+  }
+  void Frintn(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frintn(zd, pg.Merging(), zn);
+  }
+  void Frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintp(zd, pg, zn);
+  }
+  void Frintp(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frintp(zd, pg.Merging(), zn);
+  }
+  void Frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintx(zd, pg, zn);
+  }
+  void Frintx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frintx(zd, pg.Merging(), zn);
+  }
+  void Frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frintz(zd, pg, zn);
+  }
+  void Frintz(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    frintz(zd, pg.Merging(), zn);
+  }
+  void Frsqrte(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frsqrte(zd, zn);
+  }
+  void Frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    frsqrts(zd, zn, zm);
+  }
+  void Fscale(const ZRegister& zd,
+              const PRegisterM& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+  void Fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fsqrt(zd, pg, zn);
+  }
+  void Fsqrt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fsqrt(zd, pg.Merging(), zn);
+  }
+  void Fsub(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            double imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fsub(zd, pg, zd, imm);
+  }
+  void Fsub(const ZRegister& zd,
+            const PRegisterM& pg,
+            double imm,
+            const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fsubr(zd, pg, zd, imm);
+  }
+  void Fsub(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    fsub(zd, zn, zm);
+  }
+  void Ftmad(const ZRegister& zd,
+             const ZRegister& zn,
+             const ZRegister& zm,
+             int imm3);
+  void Ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ftsmul(zd, zn, zm);
+  }
+  void Ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ftssel(zd, zn, zm);
+  }
+  void Incb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incb(rdn, pattern, multiplier);
+  }
+  void Incd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incd(rdn, pattern, multiplier);
+  }
+  void Incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incd(zdn, pattern, multiplier);
+  }
+  void Inch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    inch(rdn, pattern, multiplier);
+  }
+  void Inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    inch(zdn, pattern, multiplier);
+  }
+  void Incp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incp(rdn, pg);
+  }
+  void Incp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `incp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    incp(zd, pg);
+  }
+  void Incp(const ZRegister& zdn, const PRegister& pg) { Incp(zdn, pg, zdn); }
+  void Incw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incw(rdn, pattern, multiplier);
+  }
+  void Incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    incw(zdn, pattern, multiplier);
+  }
+  void Index(const ZRegister& zd, const Operand& start, const Operand& step);
+  void Insr(const ZRegister& zdn, const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    insr(zdn, rm);
+  }
+  void Insr(const ZRegister& zdn, const VRegister& vm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    insr(zdn, vm);
+  }
+  void Insr(const ZRegister& zdn, IntegerOperand imm);
+  void Lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lasta(rd, pg, zn);
+  }
+  void Lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lasta(vd, pg, zn);
+  }
+  void Lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lastb(rd, pg, zn);
+  }
+  void Lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lastb(vd, pg, zn);
+  }
+  void Ld1b(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+  void Ld1h(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+  void Ld1w(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+  void Ld1d(const ZRegister& zt,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr);
+  void Ld1rb(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rb,
+                              kBRegSizeInBytes);
+  }
+  void Ld1rh(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rh,
+                              kHRegSizeInBytes);
+  }
+  void Ld1rw(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rw,
+                              kSRegSizeInBytes);
+  }
+  void Ld1rd(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rd,
+                              kDRegSizeInBytes);
+  }
+  void Ld1rqb(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ld1rqd(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ld1rqh(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ld1rqw(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ld1rsb(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rsb,
+                              kBRegSizeInBytes);
+  }
+  void Ld1rsh(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rsh,
+                              kHRegSizeInBytes);
+  }
+  void Ld1rsw(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadBroadcastImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rsw,
+                              kSRegSizeInBytes);
+  }
+  void Ld1sb(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+  void Ld1sh(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+  void Ld1sw(const ZRegister& zt,
+             const PRegisterZ& pg,
+             const SVEMemOperand& addr);
+  void Ld2b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld2b(zt1, zt2, pg, addr);
+  }
+  void Ld2h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld2h(zt1, zt2, pg, addr);
+  }
+  void Ld2w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld2w(zt1, zt2, pg, addr);
+  }
+  void Ld2d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld2d(zt1, zt2, pg, addr);
+  }
+  void Ld3b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld3b(zt1, zt2, zt3, pg, addr);
+  }
+  void Ld3h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld3h(zt1, zt2, zt3, pg, addr);
+  }
+  void Ld3w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld3w(zt1, zt2, zt3, pg, addr);
+  }
+  void Ld3d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld3d(zt1, zt2, zt3, pg, addr);
+  }
+  void Ld4b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld4b(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void Ld4h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld4h(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void Ld4w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld4w(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void Ld4d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegisterZ& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ld4d(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void Ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+  void Ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+  void Ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr);
+  void Ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1b(zt, pg, xn, zm);
+  }
+  void Ldff1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1b(zt, pg, zn, imm5);
+  }
+  void Ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1d(zt, pg, xn, zm);
+  }
+  void Ldff1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1d(zt, pg, zn, imm5);
+  }
+  void Ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1h(zt, pg, xn, zm);
+  }
+  void Ldff1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1h(zt, pg, zn, imm5);
+  }
+  void Ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sb(zt, pg, xn, zm);
+  }
+  void Ldff1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sb(zt, pg, zn, imm5);
+  }
+  void Ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sh(zt, pg, xn, zm);
+  }
+  void Ldff1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sh(zt, pg, zn, imm5);
+  }
+  void Ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const Register& xn,
+               const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sw(zt, pg, xn, zm);
+  }
+  void Ldff1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const ZRegister& zn,
+               int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1sw(zt, pg, zn, imm5);
+  }
+  void Ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const Register& xn,
+              const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1w(zt, pg, xn, zm);
+  }
+  void Ldff1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const ZRegister& zn,
+              int imm5) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldff1w(zt, pg, zn, imm5);
+  }
+  void Ldnf1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1b(zt, pg, addr);
+  }
+  void Ldnf1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1d(zt, pg, addr);
+  }
+  void Ldnf1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1h(zt, pg, addr);
+  }
+  void Ldnf1sb(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1sb(zt, pg, addr);
+  }
+  void Ldnf1sh(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1sh(zt, pg, addr);
+  }
+  void Ldnf1sw(const ZRegister& zt,
+               const PRegisterZ& pg,
+               const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1sw(zt, pg, addr);
+  }
+  void Ldnf1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ldnf1w(zt, pg, addr);
+  }
+  void Ldnt1b(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldnt1d(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldnt1h(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldnt1w(const ZRegister& zt,
+              const PRegisterZ& pg,
+              const SVEMemOperand& addr);
+  void Ldr(const CPURegister& rt, const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::ldr);
+  }
+  void Lsl(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    lsl(zd, pg, zd, shift);
+  }
+  void Lsl(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  void Lsl(const ZRegister& zd, const ZRegister& zn, int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lsl(zd, zn, shift);
+  }
+  void Lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lsl(zd, zn, zm);
+  }
+  void Lsr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    lsr(zd, pg, zd, shift);
+  }
+  void Lsr(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  void Lsr(const ZRegister& zd, const ZRegister& zn, int shift) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lsr(zd, zn, shift);
+  }
+  void Lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    lsr(zd, zn, zm);
+  }
+  void Mov(const PRegister& pd, const PRegister& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(pd.VnB(), pn.VnB());
+  }
+  void Mov(const PRegisterWithLaneSize& pd,
+           const PRegisterM& pg,
+           const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(pd, pg, pn);
+  }
+  void Mov(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(pd, pg, pn);
+  }
+  void Mov(const ZRegister& zd, const Register& xn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, xn);
+  }
+
+  void Mov(const ZRegister& zd, const VRegister& vn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, vn);
+  }
+
+  void Mov(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, zn);
+  }
+  void Mov(const ZRegister& zd, const ZRegister& zn, unsigned index) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, zn, index);
+  }
+  void Mov(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Cpy(zd, pg, imm);
+  }
+  // TODO: support zeroing predicated moves using movprfx.
+  void Mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, pg, rn);
+  }
+  void Mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, pg, vn);
+  }
+  void Mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    mov(zd, pg, zn);
+  }
+  void Mov(const ZRegister& zd, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Dup(zd, imm);
+  }
+  void Movs(const PRegister& pd, const PRegister& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    movs(pd, pn);
+  }
+  void Movs(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    movs(pd, pg, pn);
+  }
+  // zd = za + (zn * zm)
+  void Mla(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& za,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  // zd = za - (zn * zm)
+  void Mls(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& za,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  void Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+  void Nand(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    nand(pd, pg, pn, pm);
+  }
+  void Nands(const PRegisterWithLaneSize& pd,
+             const PRegisterZ& pg,
+             const PRegisterWithLaneSize& pn,
+             const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    nands(pd, pg, pn, pm);
+  }
+  // There is no instruction with this form, but we can implement it using
+  // `subr`.
+  void Neg(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    MovprfxHelperScope guard(this, zd, zn);
+    subr(zd, zd, 0);
+  }
+  void Neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    neg(zd, pg, zn);
+  }
+  void Nor(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    nor(pd, pg, pn, pm);
+  }
+  void Nors(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    nors(pd, pg, pn, pm);
+  }
+  void Not(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    not_(pd, pg, pn);
+  }
+  void Not(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    not_(zd, pg, zn);
+  }
+  void Nots(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    nots(pd, pg, pn);
+  }
+  void Orn(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    orn(pd, pg, pn, pm);
+  }
+  void Orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      orn(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Orns(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    orns(pd, pg, pn, pm);
+  }
+  void Orr(const PRegisterWithLaneSize& pd,
+           const PRegisterZ& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    orr(pd, pg, pn, pm);
+  }
+  void Orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    if (IsImmLogical(imm, zd.GetLaneSizeInBits())) {
+      orr(zd, zn, imm);
+    } else {
+      // TODO: Synthesise the immediate once 'Mov' is implemented.
+      VIXL_UNIMPLEMENTED();
+    }
+  }
+  void Orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+    SingleEmissionCheckScope guard(this);
+    orr(zd.VnD(), zn.VnD(), zm.VnD());
+  }
+  void Orrs(const PRegisterWithLaneSize& pd,
+            const PRegisterZ& pg,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    orrs(pd, pg, pn, pm);
+  }
+  void Orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    orv(vd, pg, zn);
+  }
+  void Pfalse(const PRegister& pd) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(pd.IsUnqualified());
+    SingleEmissionCheckScope guard(this);
+    // No matter what the lane size is, overall this operation just writes zeros
+    // throughout the register.
+    pfalse(pd.VnB());
+  }
+  void Pfirst(const PRegisterWithLaneSize& pd,
+              const PRegister& pg,
+              const PRegisterWithLaneSize& pn);
+  void Pnext(const PRegisterWithLaneSize& pd,
+             const PRegister& pg,
+             const PRegisterWithLaneSize& pn);
+  void Prfb(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    prfb(prfop, pg, addr);
+  }
+  void Prfh(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    prfh(prfop, pg, addr);
+  }
+  void Prfw(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    prfw(prfop, pg, addr);
+  }
+  void Prfd(PrefetchOperation prfop,
+            const PRegister& pg,
+            const SVEMemOperand addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    prfd(prfop, pg, addr);
+  }
+  void Ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ptest(pg, pn);
+  }
+  void Ptrue(const PRegisterWithLaneSize& pd,
+             SVEPredicateConstraint pattern,
+             FlagsUpdate s);
+  void Ptrue(const PRegisterWithLaneSize& pd,
+             SVEPredicateConstraint pattern = SVE_ALL) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ptrue(pd, pattern);
+  }
+  void Ptrues(const PRegisterWithLaneSize& pd,
+              SVEPredicateConstraint pattern = SVE_ALL) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ptrues(pd, pattern);
+  }
+  void Punpkhi(const PRegisterWithLaneSize& pd,
+               const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    punpkhi(pd, pn);
+  }
+  void Punpklo(const PRegisterWithLaneSize& pd,
+               const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    punpklo(pd, pn);
+  }
+  void Rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    rbit(zd, pg, zn);
+  }
+  void Rdffr(const PRegister& pd) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    // Although this is essentially just a move, it writes every bit and so can
+    // only support b-sized lane because other lane sizes would simplicity clear
+    // bits in `pd`.
+    VIXL_ASSERT(!pd.HasLaneSize() || pd.IsLaneSizeB());
+    VIXL_ASSERT(pd.IsUnqualified());
+    SingleEmissionCheckScope guard(this);
+    rdffr(pd.VnB());
+  }
+  void Rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    rdffr(pd, pg);
+  }
+  void Rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    rdffrs(pd, pg);
+  }
+  // Note that there is no `rdpl` instruction, but this macro emulates it (for
+  // symmetry with `Rdvl`).
+  void Rdpl(const Register& xd, int64_t multiplier) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Addpl(xd, xzr, multiplier);
+  }
+  void Rdvl(const Register& xd, int64_t multiplier) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    Addvl(xd, xzr, multiplier);
+  }
+  void Rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    rev(pd, pn);
+  }
+  void Rev(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    rev(zd, zn);
+  }
+  void Revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    revb(zd, pg, zn);
+  }
+  void Revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    revh(zd, pg, zn);
+  }
+  void Revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    revw(zd, pg, zn);
+  }
+  void Saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    saddv(dd, pg, zn);
+  }
+  void Scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    scvtf(zd, pg, zn);
+  }
+  void Sdiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Sdot(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Sdot(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+  void Sel(const PRegisterWithLaneSize& pd,
+           const PRegister& pg,
+           const PRegisterWithLaneSize& pn,
+           const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sel(pd, pg, pn, pm);
+  }
+  void Sel(const ZRegister& zd,
+           const PRegister& pg,
+           const ZRegister& zn,
+           const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sel(zd, pg, zn, zm);
+  }
+  void Setffr() {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    setffr();
+  }
+  void Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+  void Smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    smaxv(vd, pg, zn);
+  }
+  void Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+  void Sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sminv(vd, pg, zn);
+  }
+  void Splice(const ZRegister& zd,
+              const PRegister& pg,
+              const ZRegister& zn,
+              const ZRegister& zm);
+  void Sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqadd(zd, zn, zm);
+  }
+  void Sqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(imm.IsUint8() ||
+                (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+    MovprfxHelperScope guard(this, zd, zn);
+    sqadd(zd, zd, imm.AsUint16());
+  }
+  void Sqdecb(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecb(xd, wn, pattern, multiplier);
+  }
+  void Sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecb(rdn, pattern, multiplier);
+  }
+  void Sqdecd(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecd(xd, wn, pattern, multiplier);
+  }
+  void Sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecd(rdn, pattern, multiplier);
+  }
+  void Sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecd(zdn, pattern, multiplier);
+  }
+  void Sqdech(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdech(xd, wn, pattern, multiplier);
+  }
+  void Sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdech(rdn, pattern, multiplier);
+  }
+  void Sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdech(zdn, pattern, multiplier);
+  }
+  void Sqdecp(const Register& xdn,
+              const PRegisterWithLaneSize& pg,
+              const Register& wdn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecp(xdn, pg, wdn);
+  }
+  void Sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecp(xdn, pg);
+  }
+  void Sqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `sqdecp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    sqdecp(zd, pg);
+  }
+  void Sqdecp(const ZRegister& zdn, const PRegister& pg) {
+    Sqdecp(zdn, pg, zdn);
+  }
+  void Sqdecw(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecw(xd, wn, pattern, multiplier);
+  }
+  void Sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecw(rdn, pattern, multiplier);
+  }
+  void Sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqdecw(zdn, pattern, multiplier);
+  }
+  void Sqincb(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincb(xd, wn, pattern, multiplier);
+  }
+  void Sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincb(rdn, pattern, multiplier);
+  }
+  void Sqincd(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincd(xd, wn, pattern, multiplier);
+  }
+  void Sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincd(rdn, pattern, multiplier);
+  }
+  void Sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincd(zdn, pattern, multiplier);
+  }
+  void Sqinch(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqinch(xd, wn, pattern, multiplier);
+  }
+  void Sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqinch(rdn, pattern, multiplier);
+  }
+  void Sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqinch(zdn, pattern, multiplier);
+  }
+  void Sqincp(const Register& xdn,
+              const PRegisterWithLaneSize& pg,
+              const Register& wdn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincp(xdn, pg, wdn);
+  }
+  void Sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincp(xdn, pg);
+  }
+  void Sqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `sqincp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    sqincp(zd, pg);
+  }
+  void Sqincp(const ZRegister& zdn, const PRegister& pg) {
+    Sqincp(zdn, pg, zdn);
+  }
+  void Sqincw(const Register& xd,
+              const Register& wn,
+              int pattern = SVE_ALL,
+              int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincw(xd, wn, pattern, multiplier);
+  }
+  void Sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincw(rdn, pattern, multiplier);
+  }
+  void Sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqincw(zdn, pattern, multiplier);
+  }
+  void Sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sqsub(zd, zn, zm);
+  }
+  void Sqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(imm.IsUint8() ||
+                (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+    MovprfxHelperScope guard(this, zd, zn);
+    sqsub(zd, zd, imm.AsUint16());
+  }
+  void St1b(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+  void St1h(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+  void St1w(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+  void St1d(const ZRegister& zt,
+            const PRegister& pg,
+            const SVEMemOperand& addr);
+  void St2b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st2b(zt1, zt2, pg, addr);
+  }
+  void St2h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st2h(zt1, zt2, pg, addr);
+  }
+  void St2w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st2w(zt1, zt2, pg, addr);
+  }
+  void St2d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st2d(zt1, zt2, pg, addr);
+  }
+  void St3b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st3b(zt1, zt2, zt3, pg, addr);
+  }
+  void St3h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st3h(zt1, zt2, zt3, pg, addr);
+  }
+  void St3w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st3w(zt1, zt2, zt3, pg, addr);
+  }
+  void St3d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st3d(zt1, zt2, zt3, pg, addr);
+  }
+  void St4b(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st4b(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void St4h(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st4h(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void St4w(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st4w(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void St4d(const ZRegister& zt1,
+            const ZRegister& zt2,
+            const ZRegister& zt3,
+            const ZRegister& zt4,
+            const PRegister& pg,
+            const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    st4d(zt1, zt2, zt3, zt4, pg, addr);
+  }
+  void Stnt1b(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+  void Stnt1d(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+  void Stnt1h(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+  void Stnt1w(const ZRegister& zt,
+              const PRegister& pg,
+              const SVEMemOperand& addr);
+  void Str(const CPURegister& rt, const SVEMemOperand& addr) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::str);
+  }
+  void Sub(const ZRegister& zd,
+           const PRegisterM& pg,
+           const ZRegister& zn,
+           const ZRegister& zm);
+  void Sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sub(zd, zn, zm);
+  }
+  void Sub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    AddSubHelper(kSubImmediate, zd, zn, imm);
+  }
+  void Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm);
+  void Sunpkhi(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sunpkhi(zd, zn);
+  }
+  void Sunpklo(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sunpklo(zd, zn);
+  }
+  void Sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sxtb(zd, pg, zn);
+  }
+  void Sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sxth(zd, pg, zn);
+  }
+  void Sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    sxtw(zd, pg, zn);
+  }
+  void Tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    tbl(zd, zn, zm);
+  }
+  void Trn1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    trn1(pd, pn, pm);
+  }
+  void Trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    trn1(zd, zn, zm);
+  }
+  void Trn2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    trn2(pd, pn, pm);
+  }
+  void Trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    trn2(zd, zn, zm);
+  }
+  void Uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uaddv(dd, pg, zn);
+  }
+  void Ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    ucvtf(zd, pg, zn);
+  }
+  void Udiv(const ZRegister& zd,
+            const PRegisterM& pg,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Udot(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm);
+  void Udot(const ZRegister& zd,
+            const ZRegister& za,
+            const ZRegister& zn,
+            const ZRegister& zm,
+            int index);
+  void Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+  void Umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    umaxv(vd, pg, zn);
+  }
+  void Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm);
+  void Uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uminv(vd, pg, zn);
+  }
+  void Uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqadd(zd, zn, zm);
+  }
+  void Uqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(imm.IsUint8() ||
+                (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+    MovprfxHelperScope guard(this, zd, zn);
+    uqadd(zd, zd, imm.AsUint16());
+  }
+  void Uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdecb(rdn, pattern, multiplier);
+  }
+  void Uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdecd(rdn, pattern, multiplier);
+  }
+  void Uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdecd(zdn, pattern, multiplier);
+  }
+  void Uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdech(rdn, pattern, multiplier);
+  }
+  void Uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdech(zdn, pattern, multiplier);
+  }
+  // The saturation is based on the size of `rn`. The result is zero-extended
+  // into `rd`, which must be at least as big.
+  void Uqdecp(const Register& rd,
+              const PRegisterWithLaneSize& pg,
+              const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(rd.Aliases(rn));
+    VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes());
+    SingleEmissionCheckScope guard(this);
+    if (rn.Is64Bits()) {
+      uqdecp(rd, pg);
+    } else {
+      // Convert <Xd> into <Wd>, to make this more consistent with Sqdecp.
+      uqdecp(rd.W(), pg);
+    }
+  }
+  void Uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+    Uqdecp(rdn, pg, rdn);
+  }
+  void Uqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `sqdecp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    uqdecp(zd, pg);
+  }
+  void Uqdecp(const ZRegister& zdn, const PRegister& pg) {
+    Uqdecp(zdn, pg, zdn);
+  }
+  void Uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdecw(rdn, pattern, multiplier);
+  }
+  void Uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqdecw(zdn, pattern, multiplier);
+  }
+  void Uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqincb(rdn, pattern, multiplier);
+  }
+  void Uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqincd(rdn, pattern, multiplier);
+  }
+  void Uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqincd(zdn, pattern, multiplier);
+  }
+  void Uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqinch(rdn, pattern, multiplier);
+  }
+  void Uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqinch(zdn, pattern, multiplier);
+  }
+  // The saturation is based on the size of `rn`. The result is zero-extended
+  // into `rd`, which must be at least as big.
+  void Uqincp(const Register& rd,
+              const PRegisterWithLaneSize& pg,
+              const Register& rn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(rd.Aliases(rn));
+    VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes());
+    SingleEmissionCheckScope guard(this);
+    if (rn.Is64Bits()) {
+      uqincp(rd, pg);
+    } else {
+      // Convert <Xd> into <Wd>, to make this more consistent with Sqincp.
+      uqincp(rd.W(), pg);
+    }
+  }
+  void Uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) {
+    Uqincp(rdn, pg, rdn);
+  }
+  void Uqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(AreSameFormat(zd, zn));
+    // `sqincp` writes every lane, so use an unpredicated movprfx.
+    MovprfxHelperScope guard(this, zd, zn);
+    uqincp(zd, pg);
+  }
+  void Uqincp(const ZRegister& zdn, const PRegister& pg) {
+    Uqincp(zdn, pg, zdn);
+  }
+  void Uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqincw(rdn, pattern, multiplier);
+  }
+  void Uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqincw(zdn, pattern, multiplier);
+  }
+  void Uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uqsub(zd, zn, zm);
+  }
+  void Uqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    VIXL_ASSERT(imm.IsUint8() ||
+                (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0)));
+    MovprfxHelperScope guard(this, zd, zn);
+    uqsub(zd, zd, imm.AsUint16());
+  }
+  void Uunpkhi(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uunpkhi(zd, zn);
+  }
+  void Uunpklo(const ZRegister& zd, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uunpklo(zd, zn);
+  }
+  void Uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uxtb(zd, pg, zn);
+  }
+  void Uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uxth(zd, pg, zn);
+  }
+  void Uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uxtw(zd, pg, zn);
+  }
+  void Uzp1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uzp1(pd, pn, pm);
+  }
+  void Uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uzp1(zd, zn, zm);
+  }
+  void Uzp2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uzp2(pd, pn, pm);
+  }
+  void Uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    uzp2(zd, zn, zm);
+  }
+  void Whilele(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    whilele(pd, rn, rm);
+  }
+  void Whilelo(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    whilelo(pd, rn, rm);
+  }
+  void Whilels(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    whilels(pd, rn, rm);
+  }
+  void Whilelt(const PRegisterWithLaneSize& pd,
+               const Register& rn,
+               const Register& rm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    whilelt(pd, rn, rm);
+  }
+  void Wrffr(const PRegister& pn) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    // Although this is essentially just a move, it writes every bit and so can
+    // only support b-sized lane because other lane sizes would implicitly clear
+    // bits in `ffr`.
+    VIXL_ASSERT(!pn.HasLaneSize() || pn.IsLaneSizeB());
+    VIXL_ASSERT(pn.IsUnqualified());
+    SingleEmissionCheckScope guard(this);
+    wrffr(pn.VnB());
+  }
+  void Zip1(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    zip1(pd, pn, pm);
+  }
+  void Zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    zip1(zd, zn, zm);
+  }
+  void Zip2(const PRegisterWithLaneSize& pd,
+            const PRegisterWithLaneSize& pn,
+            const PRegisterWithLaneSize& pm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    zip2(pd, pn, pm);
+  }
+  void Zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+    VIXL_ASSERT(allow_macro_instructions_);
+    SingleEmissionCheckScope guard(this);
+    zip2(zd, zn, zm);
+  }
+
   template <typename T>
   Literal<T>* CreateLiteralDestroyedWithPool(T value) {
     return new Literal<T>(value,
@@ -3480,11 +6489,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
     return GetScratchRegisterList();
   }
 
-  CPURegList* GetScratchVRegisterList() { return &fptmp_list_; }
+  CPURegList* GetScratchVRegisterList() { return &v_tmp_list_; }
   VIXL_DEPRECATED("GetScratchVRegisterList", CPURegList* FPTmpList()) {
     return GetScratchVRegisterList();
   }
 
+  CPURegList* GetScratchPRegisterList() { return &p_tmp_list_; }
+
   // Get or set the current (most-deeply-nested) UseScratchRegisterScope.
   void SetCurrentScratchRegisterScope(UseScratchRegisterScope* scope) {
     current_scratch_scope_ = scope;
@@ -3548,16 +6559,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   // Will output the flags.
   void Log(TraceParameters parameters);
 
-  // Enable or disable instrumentation when an Instrument visitor is attached to
-  // the simulator.
-  void EnableInstrumentation();
-  void DisableInstrumentation();
-
-  // Add a marker to the instrumentation data produced by an Instrument visitor.
-  // The name is a two character string that will be attached to the marker in
-  // the output data.
-  void AnnotateInstrumentation(const char* marker_name);
-
   // Enable or disable CPU features dynamically. This mechanism allows users to
   // strictly check the use of CPU features in different regions of code.
   void SetSimulatorCPUFeatures(const CPUFeatures& features);
@@ -3661,6 +6662,36 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
                                                Condition cond,
                                                bool* should_synthesise_left);
 
+  // Generate code to calculate the address represented by `addr` and write it
+  // into `xd`. This is used as a common fall-back for out-of-range load and
+  // store operands.
+  //
+  // The vl_divisor_log2 argument is used to scale the VL, for use with
+  // SVE_MUL_VL.
+  void CalculateSVEAddress(const Register& xd,
+                           const SVEMemOperand& addr,
+                           int vl_divisor_log2 = 0);
+
+  void CalculateSVEAddress(const Register& xd,
+                           const SVEMemOperand& addr,
+                           const CPURegister& rt) {
+    VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister());
+    int vl_divisor_log2 = rt.IsPRegister() ? kZRegBitsPerPRegBitLog2 : 0;
+    CalculateSVEAddress(xd, addr, vl_divisor_log2);
+  }
+
+  void SetFPNaNPropagationOption(FPMacroNaNPropagationOption nan_option) {
+    fp_nan_propagation_ = nan_option;
+  }
+
+  void ResolveFPNaNPropagationOption(FPMacroNaNPropagationOption* nan_option) {
+    // The input option has priority over the option that has set.
+    if (*nan_option == NoFPMacroNaNPropagationSelected) {
+      *nan_option = fp_nan_propagation_;
+    }
+    VIXL_ASSERT(*nan_option != NoFPMacroNaNPropagationSelected);
+  }
+
  private:
   // The actual Push and Pop implementations. These don't generate any code
   // other than that required for the push or pop. This allows
@@ -3714,6 +6745,183 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features,
                                            DebugHltOpcode action);
 
+  void CompareHelper(Condition cond,
+                     const PRegisterWithLaneSize& pd,
+                     const PRegisterZ& pg,
+                     const ZRegister& zn,
+                     IntegerOperand imm);
+
+  // E.g. Ld1rb.
+  typedef void (Assembler::*SVELoadBroadcastFn)(const ZRegister& zt,
+                                                const PRegisterZ& pg,
+                                                const SVEMemOperand& addr);
+
+  void SVELoadBroadcastImmHelper(const ZRegister& zt,
+                                 const PRegisterZ& pg,
+                                 const SVEMemOperand& addr,
+                                 SVELoadBroadcastFn fn,
+                                 int divisor);
+
+  // E.g. ldr/str
+  typedef void (Assembler::*SVELoadStoreFn)(const CPURegister& rt,
+                                            const SVEMemOperand& addr);
+
+  void SVELoadStoreScalarImmHelper(const CPURegister& rt,
+                                   const SVEMemOperand& addr,
+                                   SVELoadStoreFn fn);
+
+  typedef void (Assembler::*SVELoad1Fn)(const ZRegister& zt,
+                                        const PRegisterZ& pg,
+                                        const SVEMemOperand& addr);
+  typedef void (Assembler::*SVEStore1Fn)(const ZRegister& zt,
+                                         const PRegister& pg,
+                                         const SVEMemOperand& addr);
+
+  // Helper for predicated Z register loads with addressing modes not directly
+  // encodable in the instruction. The supported_modifier parameter indicates
+  // which offset modifier the calling instruction encoder supports (eg.
+  // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as
+  // vl_divisor_log2; pass -1 to indicate no dependency.
+  template <typename Tg, typename Tf>
+  void SVELoadStoreScalarImmHelper(
+      const ZRegister& zt,
+      const Tg& pg,
+      const SVEMemOperand& addr,
+      Tf fn,
+      int imm_bits,
+      int shift_amount,
+      SVEOffsetModifier supported_modifier = NO_SVE_OFFSET_MODIFIER,
+      int vl_divisor_log2 = 0);
+
+  template <typename Tg, typename Tf>
+  void SVELoadStore1Helper(int msize_in_bytes_log2,
+                           const ZRegister& zt,
+                           const Tg& pg,
+                           const SVEMemOperand& addr,
+                           Tf fn);
+
+  template <typename Tf>
+  void SVELoadFFHelper(int msize_in_bytes_log2,
+                       const ZRegister& zt,
+                       const PRegisterZ& pg,
+                       const SVEMemOperand& addr,
+                       Tf fn);
+
+  typedef void (MacroAssembler::*IntWideImmMacroFn)(const ZRegister& zd,
+                                                    const ZRegister& zn,
+                                                    IntegerOperand imm);
+
+  typedef void (Assembler::*IntWideImmShiftFn)(const ZRegister& zd,
+                                               const ZRegister& zn,
+                                               int imm,
+                                               int shift);
+
+  typedef void (Assembler::*IntArithFn)(const ZRegister& zd,
+                                        const ZRegister& zn,
+                                        const ZRegister& zm);
+
+  typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd,
+                                          const ZRegister& zn,
+                                          int imm);
+
+  typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd,
+                                             const ZRegister& zn,
+                                             const ZRegister& zm,
+                                             int index);
+
+  typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd,
+                                                       const PRegisterM& pg,
+                                                       const ZRegister& zn,
+                                                       const ZRegister& zm);
+
+  void IntWideImmHelper(IntWideImmFn imm_fn,
+                        SVEArithPredicatedFn reg_fn,
+                        const ZRegister& zd,
+                        const ZRegister& zn,
+                        IntegerOperand imm,
+                        bool is_signed_imm);
+
+  enum AddSubHelperOption { kAddImmediate, kSubImmediate };
+
+  void AddSubHelper(AddSubHelperOption option,
+                    const ZRegister& zd,
+                    const ZRegister& zn,
+                    IntegerOperand imm);
+
+  // Try to emit an add- or sub-like instruction (imm_fn) with `imm`, or the
+  // corresponding sub- or add-like instruction (n_imm_fn) with a negated `imm`.
+  // A `movprfx` is automatically generated if one is required. If successful,
+  // return true. Otherwise, return false.
+  //
+  // This helper uses two's complement equivalences, for example treating 0xffff
+  // as -1 for H-sized lanes.
+  bool TrySingleAddSub(AddSubHelperOption option,
+                       const ZRegister& zd,
+                       const ZRegister& zn,
+                       IntegerOperand imm);
+
+  void SVESdotUdotHelper(IntArithFn fn,
+                         const ZRegister& zd,
+                         const ZRegister& za,
+                         const ZRegister& zn,
+                         const ZRegister& zm);
+
+  void SVESdotUdotIndexHelper(IntArithIndexFn fn,
+                              const ZRegister& zd,
+                              const ZRegister& za,
+                              const ZRegister& zn,
+                              const ZRegister& zm,
+                              int index);
+
+  // For noncommutative arithmetic operations.
+  void NoncommutativeArithmeticHelper(const ZRegister& zd,
+                                      const PRegisterM& pg,
+                                      const ZRegister& zn,
+                                      const ZRegister& zm,
+                                      SVEArithPredicatedFn fn,
+                                      SVEArithPredicatedFn rev_fn);
+
+  void FPCommutativeArithmeticHelper(const ZRegister& zd,
+                                     const PRegisterM& pg,
+                                     const ZRegister& zn,
+                                     const ZRegister& zm,
+                                     SVEArithPredicatedFn fn,
+                                     FPMacroNaNPropagationOption nan_option);
+
+  // Floating-point fused multiply-add vectors (predicated), writing addend.
+  typedef void (Assembler::*SVEMulAddPredicatedZdaFn)(const ZRegister& zda,
+                                                      const PRegisterM& pg,
+                                                      const ZRegister& zn,
+                                                      const ZRegister& zm);
+
+  // Floating-point fused multiply-add vectors (predicated), writing
+  // multiplicand.
+  typedef void (Assembler::*SVEMulAddPredicatedZdnFn)(const ZRegister& zdn,
+                                                      const PRegisterM& pg,
+                                                      const ZRegister& zn,
+                                                      const ZRegister& zm);
+
+  void FPMulAddHelper(const ZRegister& zd,
+                      const PRegisterM& pg,
+                      const ZRegister& za,
+                      const ZRegister& zn,
+                      const ZRegister& zm,
+                      SVEMulAddPredicatedZdaFn fn_zda,
+                      SVEMulAddPredicatedZdnFn fn_zdn,
+                      FPMacroNaNPropagationOption nan_option);
+
+  typedef void (Assembler::*SVEMulAddIndexFn)(const ZRegister& zda,
+                                              const ZRegister& zn,
+                                              const ZRegister& zm,
+                                              int index);
+
+  void FPMulAddIndexHelper(SVEMulAddIndexFn fn,
+                           const ZRegister& zd,
+                           const ZRegister& za,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           int index);
+
   // Tell whether any of the macro instruction can be used. When false the
   // MacroAssembler will assert if a method which can emit a variable number
   // of instructions is called.
@@ -3727,7 +6935,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
 
   // Scratch registers available for use by the MacroAssembler.
   CPURegList tmp_list_;
-  CPURegList fptmp_list_;
+  CPURegList v_tmp_list_;
+  CPURegList p_tmp_list_;
 
   UseScratchRegisterScope* current_scratch_scope_;
 
@@ -3737,6 +6946,8 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
   ptrdiff_t checkpoint_;
   ptrdiff_t recommended_checkpoint_;
 
+  FPMacroNaNPropagationOption fp_nan_propagation_;
+
   friend class Pool;
   friend class LiteralPool;
 };
@@ -3805,11 +7016,35 @@ class BlockPoolsScope {
   MacroAssembler* masm_;
 };
 
+MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm,
+                                       const ZRegister& dst,
+                                       const ZRegister& src)
+    : ExactAssemblyScope(masm,
+                         ShouldGenerateMovprfx(dst, src)
+                             ? (2 * kInstructionSize)
+                             : kInstructionSize) {
+  if (ShouldGenerateMovprfx(dst, src)) {
+    masm->movprfx(dst, src);
+  }
+}
+
+MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm,
+                                       const ZRegister& dst,
+                                       const PRegister& pg,
+                                       const ZRegister& src)
+    : ExactAssemblyScope(masm,
+                         ShouldGenerateMovprfx(dst, pg, src)
+                             ? (2 * kInstructionSize)
+                             : kInstructionSize) {
+  if (ShouldGenerateMovprfx(dst, pg, src)) {
+    masm->movprfx(dst, pg, src);
+  }
+}
 
 // This scope utility allows scratch registers to be managed safely. The
-// MacroAssembler's GetScratchRegisterList() (and GetScratchVRegisterList()) is
-// used as a pool of scratch registers. These registers can be allocated on
-// demand, and will be returned at the end of the scope.
+// MacroAssembler's GetScratch*RegisterList() are used as a pool of scratch
+// registers. These registers can be allocated on demand, and will be returned
+// at the end of the scope.
 //
 // When the scope ends, the MacroAssembler's lists will be restored to their
 // original state, even if the lists were modified by some other means.
@@ -3819,14 +7054,22 @@ class UseScratchRegisterScope {
   // must not be `NULL`), so it is ready to use immediately after it has been
   // constructed.
   explicit UseScratchRegisterScope(MacroAssembler* masm)
-      : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {
+      : masm_(NULL),
+        parent_(NULL),
+        old_available_(0),
+        old_available_v_(0),
+        old_available_p_(0) {
     Open(masm);
   }
   // This constructor does not implicitly initialise the scope. Instead, the
   // user is required to explicitly call the `Open` function before using the
   // scope.
   UseScratchRegisterScope()
-      : masm_(NULL), parent_(NULL), old_available_(0), old_available_v_(0) {}
+      : masm_(NULL),
+        parent_(NULL),
+        old_available_(0),
+        old_available_v_(0),
+        old_available_p_(0) {}
 
   // This function performs the actual initialisation work.
   void Open(MacroAssembler* masm);
@@ -3841,25 +7084,42 @@ class UseScratchRegisterScope {
 
   bool IsAvailable(const CPURegister& reg) const;
 
-
   // Take a register from the appropriate temps list. It will be returned
   // automatically when the scope ends.
   Register AcquireW() {
-    return AcquireNextAvailable(masm_->GetScratchRegisterList()).W();
+    return AcquireFrom(masm_->GetScratchRegisterList()).W();
   }
   Register AcquireX() {
-    return AcquireNextAvailable(masm_->GetScratchRegisterList()).X();
+    return AcquireFrom(masm_->GetScratchRegisterList()).X();
   }
   VRegister AcquireH() {
-    return AcquireNextAvailable(masm_->GetScratchVRegisterList()).H();
+    return AcquireFrom(masm_->GetScratchVRegisterList()).H();
   }
   VRegister AcquireS() {
-    return AcquireNextAvailable(masm_->GetScratchVRegisterList()).S();
+    return AcquireFrom(masm_->GetScratchVRegisterList()).S();
   }
   VRegister AcquireD() {
-    return AcquireNextAvailable(masm_->GetScratchVRegisterList()).D();
+    return AcquireFrom(masm_->GetScratchVRegisterList()).D();
+  }
+  ZRegister AcquireZ() {
+    return AcquireFrom(masm_->GetScratchVRegisterList()).Z();
+  }
+  PRegister AcquireP() {
+    // Prefer to allocate p8-p15 if we can, to leave p0-p7 available for use as
+    // governing predicates.
+    CPURegList* available = masm_->GetScratchPRegisterList();
+    RegList preferred = ~kGoverningPRegisterMask;
+    if ((available->GetList() & preferred) != 0) {
+      return AcquireFrom(available, preferred).P();
+    }
+    return AcquireFrom(available).P();
+  }
+  // Acquire a P register suitable for use as a governing predicate in
+  // instructions which only accept p0-p7 for that purpose.
+  PRegister AcquireGoverningP() {
+    CPURegList* available = masm_->GetScratchPRegisterList();
+    return AcquireFrom(available, kGoverningPRegisterMask).P();
   }
-
 
   Register AcquireRegisterOfSize(int size_in_bits);
   Register AcquireSameSizeAs(const Register& reg) {
@@ -3875,6 +7135,12 @@ class UseScratchRegisterScope {
                : CPURegister(AcquireRegisterOfSize(size_in_bits));
   }
 
+  // Acquire a register big enough to represent one lane of `vector`.
+  Register AcquireRegisterToHoldLane(const CPURegister& vector) {
+    VIXL_ASSERT(vector.GetLaneSizeInBits() <= kXRegSize);
+    return (vector.GetLaneSizeInBits() > kWRegSize) ? AcquireX() : AcquireW();
+  }
+
 
   // Explicitly release an acquired (or excluded) register, putting it back in
   // the appropriate temps list.
@@ -3892,6 +7158,10 @@ class UseScratchRegisterScope {
                const VRegister& reg2 = NoVReg,
                const VRegister& reg3 = NoVReg,
                const VRegister& reg4 = NoVReg);
+  void Include(const CPURegister& reg1,
+               const CPURegister& reg2 = NoCPUReg,
+               const CPURegister& reg3 = NoCPUReg,
+               const CPURegister& reg4 = NoCPUReg);
 
 
   // Make sure that the specified registers are not available in this scope.
@@ -3911,21 +7181,40 @@ class UseScratchRegisterScope {
                const CPURegister& reg3 = NoCPUReg,
                const CPURegister& reg4 = NoCPUReg);
 
+  // Convenience for excluding registers that are part of Operands. This is
+  // useful for sequences like this:
+  //
+  //    // Use 'rd' as a scratch, but only if it's not aliased by an input.
+  //    temps.Include(rd);
+  //    temps.Exclude(rn);
+  //    temps.Exclude(operand);
+  //
+  // Otherwise, a conditional check is needed on the last 'Exclude'.
+  void Exclude(const Operand& operand) {
+    if (operand.IsShiftedRegister() || operand.IsExtendedRegister()) {
+      Exclude(operand.GetRegister());
+    } else {
+      VIXL_ASSERT(operand.IsImmediate());
+    }
+  }
 
   // Prevent any scratch registers from being used in this scope.
   void ExcludeAll();
 
  private:
-  static CPURegister AcquireNextAvailable(CPURegList* available);
+  static CPURegister AcquireFrom(CPURegList* available,
+                                 RegList mask = ~static_cast<RegList>(0));
 
   static void ReleaseByCode(CPURegList* available, int code);
-
   static void ReleaseByRegList(CPURegList* available, RegList regs);
-
   static void IncludeByRegList(CPURegList* available, RegList exclude);
-
   static void ExcludeByRegList(CPURegList* available, RegList exclude);
 
+  CPURegList* GetAvailableListFor(CPURegister::RegisterBank bank);
+
+  static const RegList kGoverningPRegisterMask =
+      (static_cast<RegList>(1) << kNumberOfGoverningPRegisters) - 1;
+
   // The MacroAssembler maintains a list of available scratch registers, and
   // also keeps track of the most recently-opened scope so that on destruction
   // we can check that scopes do not outlive their parents.
@@ -3934,7 +7223,8 @@ class UseScratchRegisterScope {
 
   // The state of the available lists at the start of this scope.
   RegList old_available_;    // kRegister
-  RegList old_available_v_;  // kVRegister
+  RegList old_available_v_;  // kVRegister / kZRegister
+  RegList old_available_p_;  // kPRegister
 
   // Disallow copy constructor and operator=.
   VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope(
@@ -3955,23 +7245,11 @@ class UseScratchRegisterScope {
 // features needs a corresponding macro instruction.
 class SimulationCPUFeaturesScope {
  public:
-  explicit SimulationCPUFeaturesScope(
-      MacroAssembler* masm,
-      CPUFeatures::Feature feature0 = CPUFeatures::kNone,
-      CPUFeatures::Feature feature1 = CPUFeatures::kNone,
-      CPUFeatures::Feature feature2 = CPUFeatures::kNone,
-      CPUFeatures::Feature feature3 = CPUFeatures::kNone)
-      : masm_(masm),
-        cpu_features_scope_(masm, feature0, feature1, feature2, feature3) {
-    masm_->SaveSimulatorCPUFeatures();
-    masm_->EnableSimulatorCPUFeatures(
-        CPUFeatures(feature0, feature1, feature2, feature3));
-  }
-
-  SimulationCPUFeaturesScope(MacroAssembler* masm, const CPUFeatures& other)
-      : masm_(masm), cpu_features_scope_(masm, other) {
+  template <typename... T>
+  explicit SimulationCPUFeaturesScope(MacroAssembler* masm, T... features)
+      : masm_(masm), cpu_features_scope_(masm, features...) {
     masm_->SaveSimulatorCPUFeatures();
-    masm_->EnableSimulatorCPUFeatures(other);
+    masm_->EnableSimulatorCPUFeatures(CPUFeatures(features...));
   }
 
   ~SimulationCPUFeaturesScope() { masm_->RestoreSimulatorCPUFeatures(); }
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
new file mode 100644
index 00000000..b107f132
--- /dev/null
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -0,0 +1,2027 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "macro-assembler-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+void MacroAssembler::AddSubHelper(AddSubHelperOption option,
+                                  const ZRegister& zd,
+                                  const ZRegister& zn,
+                                  IntegerOperand imm) {
+  VIXL_ASSERT(imm.FitsInLane(zd));
+
+  // Simple, encodable cases.
+  if (TrySingleAddSub(option, zd, zn, imm)) return;
+
+  VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
+  bool add_imm = (option == kAddImmediate);
+
+  // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
+  // instruction. Also interpret the immediate as signed, so we can convert
+  // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
+  IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
+  if (signed_imm.IsNegative()) {
+    AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
+    IntegerOperand n_imm(signed_imm.GetMagnitude());
+    // IntegerOperand can represent -INT_MIN, so this is always safe.
+    VIXL_ASSERT(n_imm.IsPositiveOrZero());
+    if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
+  }
+
+  // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
+  UseScratchRegisterScope temps(this);
+  ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+  Dup(scratch, imm);
+
+  SingleEmissionCheckScope guard(this);
+  if (add_imm) {
+    add(zd, zn, scratch);
+  } else {
+    sub(zd, zn, scratch);
+  }
+}
+
+bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
+                                     const ZRegister& zd,
+                                     const ZRegister& zn,
+                                     IntegerOperand imm) {
+  VIXL_ASSERT(imm.FitsInLane(zd));
+
+  int imm8;
+  int shift = -1;
+  if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
+      imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
+    MovprfxHelperScope guard(this, zd, zn);
+    switch (option) {
+      case kAddImmediate:
+        add(zd, zd, imm8, shift);
+        return true;
+      case kSubImmediate:
+        sub(zd, zd, imm8, shift);
+        return true;
+    }
+  }
+  return false;
+}
+
+void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn,
+                                      SVEArithPredicatedFn reg_macro,
+                                      const ZRegister& zd,
+                                      const ZRegister& zn,
+                                      IntegerOperand imm,
+                                      bool is_signed) {
+  if (is_signed) {
+    // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
+    if (imm.IsInt8()) {
+      MovprfxHelperScope guard(this, zd, zn);
+      (this->*imm_fn)(zd, zd, imm.AsInt8());
+      return;
+    }
+  } else {
+    // E.g. UMIN_z_zi, UMAX_z_zi
+    if (imm.IsUint8()) {
+      MovprfxHelperScope guard(this, zd, zn);
+      (this->*imm_fn)(zd, zd, imm.AsUint8());
+      return;
+    }
+  }
+
+  UseScratchRegisterScope temps(this);
+  PRegister pg = temps.AcquireGoverningP();
+  Ptrue(pg.WithSameLaneSizeAs(zd));
+
+  // Try to re-use zd if we can, so we can avoid a movprfx.
+  ZRegister scratch =
+      zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
+                     : zd;
+  Dup(scratch, imm);
+
+  // The vector-form macro for commutative operations will swap the arguments to
+  // avoid movprfx, if necessary.
+  (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
+}
+
+void MacroAssembler::Mul(const ZRegister& zd,
+                         const ZRegister& zn,
+                         IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  IntWideImmFn imm_fn = &Assembler::mul;
+  SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
+  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Smin(const ZRegister& zd,
+                          const ZRegister& zn,
+                          IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInSignedLane(zd));
+  IntWideImmFn imm_fn = &Assembler::smin;
+  SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
+  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Smax(const ZRegister& zd,
+                          const ZRegister& zn,
+                          IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInSignedLane(zd));
+  IntWideImmFn imm_fn = &Assembler::smax;
+  SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
+  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
+}
+
+void MacroAssembler::Umax(const ZRegister& zd,
+                          const ZRegister& zn,
+                          IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
+  IntWideImmFn imm_fn = &Assembler::umax;
+  SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
+  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
+}
+
+void MacroAssembler::Umin(const ZRegister& zd,
+                          const ZRegister& zn,
+                          IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
+  IntWideImmFn imm_fn = &Assembler::umin;
+  SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
+  IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
+}
+
+void MacroAssembler::Addpl(const Register& xd,
+                           const Register& xn,
+                           int64_t multiplier) {
+  VIXL_ASSERT(allow_macro_instructions_);
+
+  // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
+  // `VL * multiplier` cannot overflow, for any possible value of VL.
+  VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
+  VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
+
+  if (xd.IsZero()) return;
+  if (xn.IsZero() && xd.IsSP()) {
+    // TODO: This operation doesn't make much sense, but we could support it
+    // with a scratch register if necessary.
+    VIXL_UNIMPLEMENTED();
+  }
+
+  // Handling xzr requires an extra move, so defer it until later so we can try
+  // to use `rdvl` instead (via `Addvl`).
+  if (IsInt6(multiplier) && !xn.IsZero()) {
+    SingleEmissionCheckScope guard(this);
+    addpl(xd, xn, static_cast<int>(multiplier));
+    return;
+  }
+
+  // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
+  if ((multiplier % kZRegBitsPerPRegBit) == 0) {
+    Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
+    return;
+  }
+
+  if (IsInt6(multiplier)) {
+    VIXL_ASSERT(xn.IsZero());  // Other cases were handled with `addpl`.
+    // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
+    // materialise a zero.
+    MacroEmissionCheckScope guard(this);
+    movz(xd, 0);
+    addpl(xd, xd, static_cast<int>(multiplier));
+    return;
+  }
+
+  // TODO: Some probable cases result in rather long sequences. For example,
+  // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
+  // outside the encodable range. We should look for ways to cover such cases
+  // without drastically increasing the complexity of this logic.
+
+  // For other cases, calculate xn + (PL * multiplier) using discrete
+  // instructions. This requires two scratch registers in the general case, so
+  // try to re-use the destination as a scratch register.
+  UseScratchRegisterScope temps(this);
+  temps.Include(xd);
+  temps.Exclude(xn);
+
+  Register scratch = temps.AcquireX();
+  // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
+  // scale the multiplier because (we already know) it isn't a multiple of 8.
+  Rdvl(scratch, multiplier);
+
+  MacroEmissionCheckScope guard(this);
+  if (xn.IsZero()) {
+    asr(xd, scratch, kZRegBitsPerPRegBitLog2);
+  } else if (xd.IsSP() || xn.IsSP()) {
+    // TODO: MacroAssembler::Add should be able to handle this.
+    asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
+    add(xd, xn, scratch);
+  } else {
+    add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
+  }
+}
+
+void MacroAssembler::Addvl(const Register& xd,
+                           const Register& xn,
+                           int64_t multiplier) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(xd.IsX());
+  VIXL_ASSERT(xn.IsX());
+
+  // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
+  VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
+  VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
+
+  if (xd.IsZero()) return;
+  if (xn.IsZero() && xd.IsSP()) {
+    // TODO: This operation doesn't make much sense, but we could support it
+    // with a scratch register if necessary. `rdvl` cannot write into `sp`.
+    VIXL_UNIMPLEMENTED();
+  }
+
+  if (IsInt6(multiplier)) {
+    SingleEmissionCheckScope guard(this);
+    if (xn.IsZero()) {
+      rdvl(xd, static_cast<int>(multiplier));
+    } else {
+      addvl(xd, xn, static_cast<int>(multiplier));
+    }
+    return;
+  }
+
+  // TODO: Some probable cases result in rather long sequences. For example,
+  // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
+  // outside the encodable range. We should look for ways to cover such cases
+  // without drastically increasing the complexity of this logic.
+
+  // For other cases, calculate xn + (VL * multiplier) using discrete
+  // instructions. This requires two scratch registers in the general case, so
+  // we try to re-use the destination as a scratch register.
+  UseScratchRegisterScope temps(this);
+  temps.Include(xd);
+  temps.Exclude(xn);
+
+  Register a = temps.AcquireX();
+  Mov(a, multiplier);
+
+  MacroEmissionCheckScope guard(this);
+  Register b = temps.AcquireX();
+  rdvl(b, 1);
+  if (xn.IsZero()) {
+    mul(xd, a, b);
+  } else if (xd.IsSP() || xn.IsSP()) {
+    mul(a, a, b);
+    add(xd, xn, a);
+  } else {
+    madd(xd, a, b, xn);
+  }
+}
+
+void MacroAssembler::CalculateSVEAddress(const Register& xd,
+                                         const SVEMemOperand& addr,
+                                         int vl_divisor_log2) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(!addr.IsScatterGather());
+  VIXL_ASSERT(xd.IsX());
+
+  // The lower bound is where a whole Z register is accessed.
+  VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
+  // The upper bound is for P register accesses, and for instructions like
+  // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
+  VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
+
+  SVEOffsetModifier mod = addr.GetOffsetModifier();
+  Register base = addr.GetScalarBase();
+
+  if (addr.IsEquivalentToScalar()) {
+    // For example:
+    //   [x0]
+    //   [x0, #0]
+    //   [x0, xzr, LSL 2]
+    Mov(xd, base);
+  } else if (addr.IsScalarPlusImmediate()) {
+    // For example:
+    //   [x0, #42]
+    //   [x0, #42, MUL VL]
+    int64_t offset = addr.GetImmediateOffset();
+    VIXL_ASSERT(offset != 0);  // Handled by IsEquivalentToScalar.
+    if (addr.IsMulVl()) {
+      int vl_divisor = 1 << vl_divisor_log2;
+      // For all possible values of vl_divisor, we can simply use `Addpl`. This
+      // will select `addvl` if necessary.
+      VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
+      Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
+    } else {
+      // IsScalarPlusImmediate() ensures that no other modifiers can occur.
+      VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+      Add(xd, base, offset);
+    }
+  } else if (addr.IsScalarPlusScalar()) {
+    // For example:
+    //   [x0, x1]
+    //   [x0, x1, LSL #4]
+    Register offset = addr.GetScalarOffset();
+    VIXL_ASSERT(!offset.IsZero());  // Handled by IsEquivalentToScalar.
+    if (mod == SVE_LSL) {
+      Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
+    } else {
+      // IsScalarPlusScalar() ensures that no other modifiers can occur.
+      VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
+      Add(xd, base, offset);
+    }
+  } else {
+    // All other forms are scatter-gather addresses, which cannot be evaluated
+    // into an X register.
+    VIXL_UNREACHABLE();
+  }
+}
+
+void MacroAssembler::Cpy(const ZRegister& zd,
+                         const PRegister& pg,
+                         IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInLane(zd));
+  int imm8;
+  int shift;
+  if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+      imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+    SingleEmissionCheckScope guard(this);
+    cpy(zd, pg, imm8, shift);
+    return;
+  }
+
+  // The fallbacks rely on `cpy` variants that only support merging predication.
+  // If zeroing predication was requested, zero the destination first.
+  if (pg.IsZeroing()) {
+    SingleEmissionCheckScope guard(this);
+    dup(zd, 0);
+  }
+  PRegisterM pg_m = pg.Merging();
+
+  // Try to encode the immediate using fcpy.
+  VIXL_ASSERT(imm.FitsInLane(zd));
+  if (zd.GetLaneSizeInBits() >= kHRegSize) {
+    double fp_imm = 0.0;
+    switch (zd.GetLaneSizeInBits()) {
+      case kHRegSize:
+        fp_imm =
+            FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
+        break;
+      case kSRegSize:
+        fp_imm = RawbitsToFloat(imm.AsUint32());
+        break;
+      case kDRegSize:
+        fp_imm = RawbitsToDouble(imm.AsUint64());
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        break;
+    }
+    // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
+    // we can use IsImmFP64 for all lane sizes.
+    if (IsImmFP64(fp_imm)) {
+      SingleEmissionCheckScope guard(this);
+      fcpy(zd, pg_m, fp_imm);
+      return;
+    }
+  }
+
+  // Fall back to using a scratch register.
+  UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireRegisterToHoldLane(zd);
+  Mov(scratch, imm);
+
+  SingleEmissionCheckScope guard(this);
+  cpy(zd, pg_m, scratch);
+}
+
+// TODO: We implement Fcpy (amongst other things) for all FP types because it
+// allows us to preserve user-specified NaNs. We should come up with some
+// FPImmediate type to abstract this, and avoid all the duplication below (and
+// elsewhere).
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          double imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(pg.IsMerging());
+
+  if (IsImmFP64(imm)) {
+    SingleEmissionCheckScope guard(this);
+    fcpy(zd, pg, imm);
+    return;
+  }
+
+  // As a fall-back, cast the immediate to the required lane size, and try to
+  // encode the bit pattern using `Cpy`.
+  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          float imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(pg.IsMerging());
+
+  if (IsImmFP32(imm)) {
+    SingleEmissionCheckScope guard(this);
+    fcpy(zd, pg, imm);
+    return;
+  }
+
+  // As a fall-back, cast the immediate to the required lane size, and try to
+  // encode the bit pattern using `Cpy`.
+  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Fcpy(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          Float16 imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(pg.IsMerging());
+
+  if (IsImmFP16(imm)) {
+    SingleEmissionCheckScope guard(this);
+    fcpy(zd, pg, imm);
+    return;
+  }
+
+  // As a fall-back, cast the immediate to the required lane size, and try to
+  // encode the bit pattern using `Cpy`.
+  Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
+}
+
+void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInLane(zd));
+  unsigned lane_size = zd.GetLaneSizeInBits();
+  int imm8;
+  int shift;
+  if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+      imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+    SingleEmissionCheckScope guard(this);
+    dup(zd, imm8, shift);
+  } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
+    SingleEmissionCheckScope guard(this);
+    dupm(zd, imm.AsUintN(lane_size));
+  } else {
+    UseScratchRegisterScope temps(this);
+    Register scratch = temps.AcquireRegisterToHoldLane(zd);
+    Mov(scratch, imm);
+
+    SingleEmissionCheckScope guard(this);
+    dup(zd, scratch);
+  }
+}
+
+void MacroAssembler::NoncommutativeArithmeticHelper(
+    const ZRegister& zd,
+    const PRegisterM& pg,
+    const ZRegister& zn,
+    const ZRegister& zm,
+    SVEArithPredicatedFn fn,
+    SVEArithPredicatedFn rev_fn) {
+  if (zd.Aliases(zn)) {
+    // E.g. zd = zd / zm
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zd, pg, zn, zm);
+  } else if (zd.Aliases(zm)) {
+    // E.g. zd = zn / zd
+    SingleEmissionCheckScope guard(this);
+    (this->*rev_fn)(zd, pg, zm, zn);
+  } else {
+    // E.g. zd = zn / zm
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    (this->*fn)(zd, pg, zd, zm);
+  }
+}
+
+void MacroAssembler::FPCommutativeArithmeticHelper(
+    const ZRegister& zd,
+    const PRegisterM& pg,
+    const ZRegister& zn,
+    const ZRegister& zm,
+    SVEArithPredicatedFn fn,
+    FPMacroNaNPropagationOption nan_option) {
+  ResolveFPNaNPropagationOption(&nan_option);
+
+  if (zd.Aliases(zn)) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zd, pg, zd, zm);
+  } else if (zd.Aliases(zm)) {
+    switch (nan_option) {
+      case FastNaNPropagation: {
+        // Swap the arguments.
+        SingleEmissionCheckScope guard(this);
+        (this->*fn)(zd, pg, zd, zn);
+        return;
+      }
+      case StrictNaNPropagation: {
+        UseScratchRegisterScope temps(this);
+        // Use a scratch register to keep the argument order exactly as
+        // specified.
+        ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
+        {
+          MovprfxHelperScope guard(this, scratch, pg, zn);
+          (this->*fn)(scratch, pg, scratch, zm);
+        }
+        Mov(zd, scratch);
+        return;
+      }
+      case NoFPMacroNaNPropagationSelected:
+        VIXL_UNREACHABLE();
+        return;
+    }
+  } else {
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    (this->*fn)(zd, pg, zd, zm);
+  }
+}
+
+void MacroAssembler::Asr(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::asr),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::asrr));
+}
+
+void MacroAssembler::Lsl(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::lsl),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::lslr));
+}
+
+void MacroAssembler::Lsr(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::lsr),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::lsrr));
+}
+
+void MacroAssembler::Fdiv(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::fdiv),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::fdivr));
+}
+
+void MacroAssembler::Fsub(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::fsub),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::fsubr));
+}
+
+void MacroAssembler::Fadd(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fadd),
+                                nan_option);
+}
+
+void MacroAssembler::Fabd(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fabd),
+                                nan_option);
+}
+
+void MacroAssembler::Fmul(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fmul),
+                                nan_option);
+}
+
+void MacroAssembler::Fmulx(const ZRegister& zd,
+                           const PRegisterM& pg,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fmulx),
+                                nan_option);
+}
+
+void MacroAssembler::Fmax(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fmax),
+                                nan_option);
+}
+
+void MacroAssembler::Fmin(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fmin),
+                                nan_option);
+}
+
+void MacroAssembler::Fmaxnm(const ZRegister& zd,
+                            const PRegisterM& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm,
+                            FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fmaxnm),
+                                nan_option);
+}
+
+void MacroAssembler::Fminnm(const ZRegister& zd,
+                            const PRegisterM& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm,
+                            FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPCommutativeArithmeticHelper(zd,
+                                pg,
+                                zn,
+                                zm,
+                                static_cast<SVEArithPredicatedFn>(
+                                    &Assembler::fminnm),
+                                nan_option);
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+
+  switch (zd.GetLaneSizeInBits()) {
+    case kHRegSize:
+      Fdup(zd, Float16(imm));
+      break;
+    case kSRegSize:
+      Fdup(zd, static_cast<float>(imm));
+      break;
+    case kDRegSize:
+      if (IsImmFP64(imm)) {
+        SingleEmissionCheckScope guard(this);
+        fdup(zd, imm);
+      } else {
+        Dup(zd, DoubleToRawbits(imm));
+      }
+      break;
+  }
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+
+  switch (zd.GetLaneSizeInBits()) {
+    case kHRegSize:
+      Fdup(zd, Float16(imm));
+      break;
+    case kSRegSize:
+      if (IsImmFP32(imm)) {
+        SingleEmissionCheckScope guard(this);
+        fdup(zd, imm);
+      } else {
+        Dup(zd, FloatToRawbits(imm));
+      }
+      break;
+    case kDRegSize:
+      Fdup(zd, static_cast<double>(imm));
+      break;
+  }
+}
+
+void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+
+  switch (zd.GetLaneSizeInBits()) {
+    case kHRegSize:
+      if (IsImmFP16(imm)) {
+        SingleEmissionCheckScope guard(this);
+        fdup(zd, imm);
+      } else {
+        Dup(zd, Float16ToRawbits(imm));
+      }
+      break;
+    case kSRegSize:
+      Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
+      break;
+    case kDRegSize:
+      Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
+      break;
+  }
+}
+
+void MacroAssembler::Index(const ZRegister& zd,
+                           const Operand& start,
+                           const Operand& step) {
+  class IndexOperand : public Operand {
+   public:
+    static IndexOperand Prepare(MacroAssembler* masm,
+                                UseScratchRegisterScope* temps,
+                                const Operand& op,
+                                const ZRegister& zd) {
+      // Look for encodable immediates.
+      int imm;
+      if (op.IsImmediate()) {
+        if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) {
+          return IndexOperand(imm);
+        }
+        Register scratch = temps->AcquireRegisterToHoldLane(zd);
+        masm->Mov(scratch, op);
+        return IndexOperand(scratch);
+      } else {
+        // Plain registers can be encoded directly.
+        VIXL_ASSERT(op.IsPlainRegister());
+        return IndexOperand(op.GetRegister());
+      }
+    }
+
+    int GetImm5() const {
+      int64_t imm = GetImmediate();
+      VIXL_ASSERT(IsInt5(imm));
+      return static_cast<int>(imm);
+    }
+
+   private:
+    explicit IndexOperand(const Register& reg) : Operand(reg) {}
+    explicit IndexOperand(int64_t imm) : Operand(imm) {}
+  };
+
+  UseScratchRegisterScope temps(this);
+  IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
+  IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
+
+  SingleEmissionCheckScope guard(this);
+  if (start_enc.IsImmediate()) {
+    if (step_enc.IsImmediate()) {
+      index(zd, start_enc.GetImm5(), step_enc.GetImm5());
+    } else {
+      index(zd, start_enc.GetImm5(), step_enc.GetRegister());
+    }
+  } else {
+    if (step_enc.IsImmediate()) {
+      index(zd, start_enc.GetRegister(), step_enc.GetImm5());
+    } else {
+      index(zd, start_enc.GetRegister(), step_enc.GetRegister());
+    }
+  }
+}
+
+void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(imm.FitsInLane(zdn));
+
+  if (imm.IsZero()) {
+    SingleEmissionCheckScope guard(this);
+    insr(zdn, xzr);
+    return;
+  }
+
+  UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireRegisterToHoldLane(zdn);
+
+  // TODO: There are many cases where we could optimise immediates, such as by
+  // detecting repeating patterns or FP immediates. We should optimise and
+  // abstract this for use in other SVE mov-immediate-like macros.
+  Mov(scratch, imm);
+
+  SingleEmissionCheckScope guard(this);
+  insr(zdn, scratch);
+}
+
+void MacroAssembler::Mla(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& za,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(za)) {
+    // zda = zda + (zn * zm)
+    SingleEmissionCheckScope guard(this);
+    mla(zd, pg, zn, zm);
+  } else if (zd.Aliases(zn)) {
+    // zdn = za + (zdn * zm)
+    SingleEmissionCheckScope guard(this);
+    mad(zd, pg, zm, za);
+  } else if (zd.Aliases(zm)) {
+    // Multiplication is commutative, so we can swap zn and zm.
+    // zdm = za + (zdm * zn)
+    SingleEmissionCheckScope guard(this);
+    mad(zd, pg, zn, za);
+  } else {
+    // zd = za + (zn * zm)
+    ExactAssemblyScope guard(this, 2 * kInstructionSize);
+    movprfx(zd, pg, za);
+    mla(zd, pg, zn, zm);
+  }
+}
+
+void MacroAssembler::Mls(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& za,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(za)) {
+    // zda = zda - (zn * zm)
+    SingleEmissionCheckScope guard(this);
+    mls(zd, pg, zn, zm);
+  } else if (zd.Aliases(zn)) {
+    // zdn = za - (zdn * zm)
+    SingleEmissionCheckScope guard(this);
+    msb(zd, pg, zm, za);
+  } else if (zd.Aliases(zm)) {
+    // Multiplication is commutative, so we can swap zn and zm.
+    // zdm = za - (zdm * zn)
+    SingleEmissionCheckScope guard(this);
+    msb(zd, pg, zn, za);
+  } else {
+    // zd = za - (zn * zm)
+    ExactAssemblyScope guard(this, 2 * kInstructionSize);
+    movprfx(zd, pg, za);
+    mls(zd, pg, zn, zm);
+  }
+}
+
+void MacroAssembler::CompareHelper(Condition cond,
+                                   const PRegisterWithLaneSize& pd,
+                                   const PRegisterZ& pg,
+                                   const ZRegister& zn,
+                                   IntegerOperand imm) {
+  UseScratchRegisterScope temps(this);
+  ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+  Dup(zm, imm);
+  SingleEmissionCheckScope guard(this);
+  cmp(cond, pd, pg, zn, zm);
+}
+
+void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
+                            const PRegister& pg,
+                            const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(pd.IsLaneSizeB());
+  VIXL_ASSERT(pn.IsLaneSizeB());
+  if (pd.Is(pn)) {
+    SingleEmissionCheckScope guard(this);
+    pfirst(pd, pg, pn);
+  } else {
+    UseScratchRegisterScope temps(this);
+    PRegister temp_pg = pg;
+    if (pd.Aliases(pg)) {
+      temp_pg = temps.AcquireP();
+      Mov(temp_pg.VnB(), pg.VnB());
+    }
+    Mov(pd, pn);
+    SingleEmissionCheckScope guard(this);
+    pfirst(pd, temp_pg, pd);
+  }
+}
+
+void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
+                           const PRegister& pg,
+                           const PRegisterWithLaneSize& pn) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(AreSameFormat(pd, pn));
+  if (pd.Is(pn)) {
+    SingleEmissionCheckScope guard(this);
+    pnext(pd, pg, pn);
+  } else {
+    UseScratchRegisterScope temps(this);
+    PRegister temp_pg = pg;
+    if (pd.Aliases(pg)) {
+      temp_pg = temps.AcquireP();
+      Mov(temp_pg.VnB(), pg.VnB());
+    }
+    Mov(pd.VnB(), pn.VnB());
+    SingleEmissionCheckScope guard(this);
+    pnext(pd, temp_pg, pd);
+  }
+}
+
+void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
+                           SVEPredicateConstraint pattern,
+                           FlagsUpdate s) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  switch (s) {
+    case LeaveFlags:
+      Ptrue(pd, pattern);
+      return;
+    case SetFlags:
+      Ptrues(pd, pattern);
+      return;
+  }
+  VIXL_UNREACHABLE();
+}
+
+void MacroAssembler::Sdiv(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::sdiv),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::sdivr));
+}
+
+void MacroAssembler::Sub(const ZRegister& zd,
+                         IntegerOperand imm,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+
+  int imm8;
+  int shift = -1;
+  if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
+      imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
+    MovprfxHelperScope guard(this, zd, zm);
+    subr(zd, zd, imm8, shift);
+  } else {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
+    Dup(scratch, imm);
+
+    SingleEmissionCheckScope guard(this);
+    sub(zd, scratch, zm);
+  }
+}
+
+void MacroAssembler::Sub(const ZRegister& zd,
+                         const PRegisterM& pg,
+                         const ZRegister& zn,
+                         const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::sub),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::subr));
+}
+
+void MacroAssembler::Udiv(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  NoncommutativeArithmeticHelper(zd,
+                                 pg,
+                                 zn,
+                                 zm,
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::udiv),
+                                 static_cast<SVEArithPredicatedFn>(
+                                     &Assembler::udivr));
+}
+
+void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
+                                               const PRegisterZ& pg,
+                                               const SVEMemOperand& addr,
+                                               SVELoadBroadcastFn fn,
+                                               int divisor) {
+  VIXL_ASSERT(addr.IsScalarPlusImmediate());
+  int64_t imm = addr.GetImmediateOffset();
+  if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, addr);
+  } else {
+    UseScratchRegisterScope temps(this);
+    Register scratch = temps.AcquireX();
+    CalculateSVEAddress(scratch, addr, zt);
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, SVEMemOperand(scratch));
+  }
+}
+
+void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
+                                                 const SVEMemOperand& addr,
+                                                 SVELoadStoreFn fn) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
+
+  if (addr.IsPlainScalar() ||
+      (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
+       addr.IsMulVl())) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(rt, addr);
+    return;
+  }
+
+  if (addr.IsEquivalentToScalar()) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
+    return;
+  }
+
+  UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX();
+  CalculateSVEAddress(scratch, addr, rt);
+  SingleEmissionCheckScope guard(this);
+  (this->*fn)(rt, SVEMemOperand(scratch));
+}
+
+template <typename Tg, typename Tf>
+void MacroAssembler::SVELoadStoreScalarImmHelper(
+    const ZRegister& zt,
+    const Tg& pg,
+    const SVEMemOperand& addr,
+    Tf fn,
+    int imm_bits,
+    int shift_amount,
+    SVEOffsetModifier supported_modifier,
+    int vl_divisor_log2) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  int imm_divisor = 1 << shift_amount;
+
+  if (addr.IsPlainScalar() ||
+      (addr.IsScalarPlusImmediate() &&
+       IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
+       ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
+       (addr.GetOffsetModifier() == supported_modifier))) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, addr);
+    return;
+  }
+
+  if (addr.IsEquivalentToScalar()) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
+    return;
+  }
+
+  if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
+      (vl_divisor_log2 == -1)) {
+    // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
+    // dependent.
+    VIXL_UNIMPLEMENTED();
+  }
+
+  UseScratchRegisterScope temps(this);
+  Register scratch = temps.AcquireX();
+  CalculateSVEAddress(scratch, addr, vl_divisor_log2);
+  SingleEmissionCheckScope guard(this);
+  (this->*fn)(zt, pg, SVEMemOperand(scratch));
+}
+
+template <typename Tg, typename Tf>
+void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
+                                         const ZRegister& zt,
+                                         const Tg& pg,
+                                         const SVEMemOperand& addr,
+                                         Tf fn) {
+  if (addr.IsPlainScalar() ||
+      (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
+       addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
+      (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
+       addr.IsMulVl())) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, addr);
+    return;
+  }
+
+  if (addr.IsEquivalentToScalar()) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
+    return;
+  }
+
+  if (addr.IsVectorPlusImmediate()) {
+    uint64_t offset = addr.GetImmediateOffset();
+    if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
+        IsUint5(offset >> msize_in_bytes_log2)) {
+      SingleEmissionCheckScope guard(this);
+      (this->*fn)(zt, pg, addr);
+      return;
+    }
+  }
+
+  if (addr.IsScalarPlusVector()) {
+    VIXL_ASSERT(addr.IsScatterGather());
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, addr);
+    return;
+  }
+
+  UseScratchRegisterScope temps(this);
+  if (addr.IsScatterGather()) {
+    // In scatter-gather modes, zt and zn/zm have the same lane size. However,
+    // for 32-bit accesses, the result of each lane's address calculation still
+    // requires 64 bits; we can't naively use `Adr` for the address calculation
+    // because it would truncate each address to 32 bits.
+
+    if (addr.IsVectorPlusImmediate()) {
+      // Synthesise the immediate in an X register, then use a
+      // scalar-plus-vector access with the original vector.
+      Register scratch = temps.AcquireX();
+      Mov(scratch, addr.GetImmediateOffset());
+      SingleEmissionCheckScope guard(this);
+      SVEOffsetModifier om =
+          zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
+      (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
+      return;
+    }
+
+    VIXL_UNIMPLEMENTED();
+  } else {
+    Register scratch = temps.AcquireX();
+    // TODO: If we have an immediate offset that is a multiple of
+    // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
+    // save an instruction.
+    int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
+    CalculateSVEAddress(scratch, addr, vl_divisor_log2);
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, SVEMemOperand(scratch));
+  }
+}
+
+template <typename Tf>
+void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
+                                     const ZRegister& zt,
+                                     const PRegisterZ& pg,
+                                     const SVEMemOperand& addr,
+                                     Tf fn) {
+  if (addr.IsScatterGather()) {
+    // Scatter-gather first-fault loads share encodings with normal loads.
+    SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
+    return;
+  }
+
+  // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
+  // so we don't do immediate synthesis.
+
+  // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
+  // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
+  if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
+                               addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zt, pg, addr);
+    return;
+  }
+
+  VIXL_UNIMPLEMENTED();
+}
+
+void MacroAssembler::Ld1b(const ZRegister& zt,
+                          const PRegisterZ& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kBRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1b));
+}
+
+void MacroAssembler::Ld1h(const ZRegister& zt,
+                          const PRegisterZ& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kHRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1h));
+}
+
+void MacroAssembler::Ld1w(const ZRegister& zt,
+                          const PRegisterZ& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kWRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1w));
+}
+
+void MacroAssembler::Ld1d(const ZRegister& zt,
+                          const PRegisterZ& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kDRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1d));
+}
+
+void MacroAssembler::Ld1sb(const ZRegister& zt,
+                           const PRegisterZ& pg,
+                           const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kBRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1sb));
+}
+
+void MacroAssembler::Ld1sh(const ZRegister& zt,
+                           const PRegisterZ& pg,
+                           const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kHRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1sh));
+}
+
+void MacroAssembler::Ld1sw(const ZRegister& zt,
+                           const PRegisterZ& pg,
+                           const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kSRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVELoad1Fn>(&Assembler::ld1sw));
+}
+
+void MacroAssembler::St1b(const ZRegister& zt,
+                          const PRegister& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kBRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVEStore1Fn>(&Assembler::st1b));
+}
+
+void MacroAssembler::St1h(const ZRegister& zt,
+                          const PRegister& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kHRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVEStore1Fn>(&Assembler::st1h));
+}
+
+void MacroAssembler::St1w(const ZRegister& zt,
+                          const PRegister& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kSRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVEStore1Fn>(&Assembler::st1w));
+}
+
+void MacroAssembler::St1d(const ZRegister& zt,
+                          const PRegister& pg,
+                          const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStore1Helper(kDRegSizeInBytesLog2,
+                      zt,
+                      pg,
+                      addr,
+                      static_cast<SVEStore1Fn>(&Assembler::st1d));
+}
+
+void MacroAssembler::Ldff1b(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kBRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1b));
+}
+
+void MacroAssembler::Ldff1h(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kHRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1h));
+}
+
+void MacroAssembler::Ldff1w(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kSRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1w));
+}
+
+void MacroAssembler::Ldff1d(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kDRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1d));
+}
+
+void MacroAssembler::Ldff1sb(const ZRegister& zt,
+                             const PRegisterZ& pg,
+                             const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kBRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
+}
+
+void MacroAssembler::Ldff1sh(const ZRegister& zt,
+                             const PRegisterZ& pg,
+                             const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kHRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
+}
+
+void MacroAssembler::Ldff1sw(const ZRegister& zt,
+                             const PRegisterZ& pg,
+                             const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadFFHelper(kSRegSizeInBytesLog2,
+                  zt,
+                  pg,
+                  addr,
+                  static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
+}
+
+void MacroAssembler::Ld1rqb(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rqb,
+                              4,
+                              4,
+                              NO_SVE_OFFSET_MODIFIER,
+                              -1);
+}
+
+void MacroAssembler::Ld1rqd(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rqd,
+                              4,
+                              4,
+                              NO_SVE_OFFSET_MODIFIER,
+                              -1);
+}
+
+void MacroAssembler::Ld1rqh(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rqh,
+                              4,
+                              4,
+                              NO_SVE_OFFSET_MODIFIER,
+                              -1);
+}
+
+void MacroAssembler::Ld1rqw(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ld1rqw,
+                              4,
+                              4,
+                              NO_SVE_OFFSET_MODIFIER,
+                              -1);
+}
+
+void MacroAssembler::Ldnt1b(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ldnt1b,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1d(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ldnt1d,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1h(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ldnt1h,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+
+void MacroAssembler::Ldnt1w(const ZRegister& zt,
+                            const PRegisterZ& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::ldnt1w,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+
+void MacroAssembler::Stnt1b(const ZRegister& zt,
+                            const PRegister& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::stnt1b,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1d(const ZRegister& zt,
+                            const PRegister& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::stnt1d,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1h(const ZRegister& zt,
+                            const PRegister& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::stnt1h,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+void MacroAssembler::Stnt1w(const ZRegister& zt,
+                            const PRegister& pg,
+                            const SVEMemOperand& addr) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVELoadStoreScalarImmHelper(zt,
+                              pg,
+                              addr,
+                              &MacroAssembler::stnt1w,
+                              4,
+                              0,
+                              SVE_MUL_VL);
+}
+
+void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn,
+                                            const ZRegister& zd,
+                                            const ZRegister& za,
+                                            const ZRegister& zn,
+                                            const ZRegister& zm,
+                                            int index) {
+  if (zd.Aliases(za)) {
+    // zda = zda + (zn . zm)
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zd, zn, zm, index);
+
+  } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+    // zdn = za + (zdn . zm[index])
+    // zdm = za + (zn . zdm[index])
+    // zdnm = za + (zdnm . zdnm[index])
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, za);
+      (this->*fn)(scratch, zn, zm, index);
+    }
+
+    Mov(zd, scratch);
+  } else {
+    // zd = za + (zn . zm)
+    MovprfxHelperScope guard(this, zd, za);
+    (this->*fn)(zd, zn, zm, index);
+  }
+}
+
+void MacroAssembler::SVESdotUdotHelper(IntArithFn fn,
+                                       const ZRegister& zd,
+                                       const ZRegister& za,
+                                       const ZRegister& zn,
+                                       const ZRegister& zm) {
+  if (zd.Aliases(za)) {
+    // zda = zda + (zn . zm)
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zd, zn, zm);
+
+  } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+    // zdn = za + (zdn . zm)
+    // zdm = za + (zn . zdm)
+    // zdnm = za + (zdnm . zdnm)
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, za);
+      (this->*fn)(scratch, zn, zm);
+    }
+
+    Mov(zd, scratch);
+  } else {
+    // zd = za + (zn . zm)
+    MovprfxHelperScope guard(this, zd, za);
+    (this->*fn)(zd, zn, zm);
+  }
+}
+
+void MacroAssembler::Fscale(const ZRegister& zd,
+                            const PRegisterM& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
+    Mov(scratch, zm);
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fscale(zd, pg, zd, scratch);
+  } else {
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fscale(zd, pg, zd, zm);
+  }
+}
+
+void MacroAssembler::Sdot(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm);
+}
+
+void MacroAssembler::Sdot(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          int index) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Udot(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm);
+}
+
+void MacroAssembler::Udot(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          int index) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
+                                    const PRegisterM& pg,
+                                    const ZRegister& za,
+                                    const ZRegister& zn,
+                                    const ZRegister& zm,
+                                    SVEMulAddPredicatedZdaFn fn_zda,
+                                    SVEMulAddPredicatedZdnFn fn_zdn,
+                                    FPMacroNaNPropagationOption nan_option) {
+  ResolveFPNaNPropagationOption(&nan_option);
+
+  if (zd.Aliases(za)) {
+    // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
+    SingleEmissionCheckScope guard(this);
+    (this->*fn_zda)(zd, pg, zn, zm);
+  } else if (zd.Aliases(zn)) {
+    // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
+    SingleEmissionCheckScope guard(this);
+    (this->*fn_zdn)(zd, pg, zm, za);
+  } else if (zd.Aliases(zm)) {
+    switch (nan_option) {
+      case FastNaNPropagation: {
+        // We treat multiplication as commutative in the fast mode, so we can
+        // swap zn and zm.
+        // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
+        SingleEmissionCheckScope guard(this);
+        (this->*fn_zdn)(zd, pg, zn, za);
+        return;
+      }
+      case StrictNaNPropagation: {
+        UseScratchRegisterScope temps(this);
+        // Use a scratch register to keep the argument order exactly as
+        // specified.
+        ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
+        {
+          MovprfxHelperScope guard(this, scratch, pg, za);
+          // scratch = (-)za + ((-)zn * zm)
+          (this->*fn_zda)(scratch, pg, zn, zm);
+        }
+        Mov(zd, scratch);
+        return;
+      }
+      case NoFPMacroNaNPropagationSelected:
+        VIXL_UNREACHABLE();
+        return;
+    }
+  } else {
+    // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
+    MovprfxHelperScope guard(this, zd, pg, za);
+    (this->*fn_zda)(zd, pg, zn, zm);
+  }
+}
+
+void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn,
+                                         const ZRegister& zd,
+                                         const ZRegister& za,
+                                         const ZRegister& zn,
+                                         const ZRegister& zm,
+                                         int index) {
+  if (zd.Aliases(za)) {
+    // zda = zda + (zn * zm[i])
+    SingleEmissionCheckScope guard(this);
+    (this->*fn)(zd, zn, zm, index);
+
+  } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
+    // zdn = za + (zdn * zm[i])
+    // zdm = za + (zn * zdm[i])
+    // zdnm = za + (zdnm * zdnm[i])
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, za);
+      (this->*fn)(scratch, zn, zm, index);
+    }
+    Mov(zd, scratch);
+  } else {
+    // zd = za + (zn * zm[i])
+    MovprfxHelperScope guard(this, zd, za);
+    (this->*fn)(zd, zn, zm, index);
+  }
+}
+
+void MacroAssembler::Fmla(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddHelper(zd,
+                 pg,
+                 za,
+                 zn,
+                 zm,
+                 &Assembler::fmla,
+                 &Assembler::fmad,
+                 nan_option);
+}
+
+void MacroAssembler::Fmla(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          int index) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Fmls(const ZRegister& zd,
+                          const PRegisterM& pg,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddHelper(zd,
+                 pg,
+                 za,
+                 zn,
+                 zm,
+                 &Assembler::fmls,
+                 &Assembler::fmsb,
+                 nan_option);
+}
+
+void MacroAssembler::Fmls(const ZRegister& zd,
+                          const ZRegister& za,
+                          const ZRegister& zn,
+                          const ZRegister& zm,
+                          int index) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Fnmla(const ZRegister& zd,
+                           const PRegisterM& pg,
+                           const ZRegister& za,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddHelper(zd,
+                 pg,
+                 za,
+                 zn,
+                 zm,
+                 &Assembler::fnmla,
+                 &Assembler::fnmad,
+                 nan_option);
+}
+
+void MacroAssembler::Fnmls(const ZRegister& zd,
+                           const PRegisterM& pg,
+                           const ZRegister& za,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           FPMacroNaNPropagationOption nan_option) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  FPMulAddHelper(zd,
+                 pg,
+                 za,
+                 zn,
+                 zm,
+                 &Assembler::fnmls,
+                 &Assembler::fnmsb,
+                 nan_option);
+}
+
+void MacroAssembler::Ftmad(const ZRegister& zd,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           int imm3) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
+    Mov(scratch, zm);
+    MovprfxHelperScope guard(this, zd, zn);
+    ftmad(zd, zd, scratch, imm3);
+  } else {
+    MovprfxHelperScope guard(this, zd, zn);
+    ftmad(zd, zd, zm, imm3);
+  }
+}
+
+void MacroAssembler::Fcadd(const ZRegister& zd,
+                           const PRegisterM& pg,
+                           const ZRegister& zn,
+                           const ZRegister& zm,
+                           int rot) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, pg, zn);
+      fcadd(scratch, pg, scratch, zm, rot);
+    }
+    Mov(zd, scratch);
+  } else {
+    MovprfxHelperScope guard(this, zd, pg, zn);
+    fcadd(zd, pg, zd, zm, rot);
+  }
+}
+
+void MacroAssembler::Ext(const ZRegister& zd,
+                         const ZRegister& zn,
+                         const ZRegister& zm,
+                         unsigned offset) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    // zd = ext(zn, zd, offset)
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, zn);
+      ext(scratch, scratch, zm, offset);
+    }
+    Mov(zd, scratch);
+  } else {
+    // zd = ext(zn, zm, offset)
+    // zd = ext(zd, zd, offset)
+    MovprfxHelperScope guard(this, zd, zn);
+    ext(zd, zd, zm, offset);
+  }
+}
+
+void MacroAssembler::Splice(const ZRegister& zd,
+                            const PRegister& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, zn);
+      splice(scratch, pg, scratch, zm);
+    }
+    Mov(zd, scratch);
+  } else {
+    MovprfxHelperScope guard(this, zd, zn);
+    splice(zd, pg, zd, zm);
+  }
+}
+
+void MacroAssembler::Clasta(const ZRegister& zd,
+                            const PRegister& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, zn);
+      clasta(scratch, pg, scratch, zm);
+    }
+    Mov(zd, scratch);
+  } else {
+    MovprfxHelperScope guard(this, zd, zn);
+    clasta(zd, pg, zd, zm);
+  }
+}
+
+void MacroAssembler::Clastb(const ZRegister& zd,
+                            const PRegister& pg,
+                            const ZRegister& zn,
+                            const ZRegister& zm) {
+  VIXL_ASSERT(allow_macro_instructions_);
+  if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+    UseScratchRegisterScope temps(this);
+    ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+    {
+      MovprfxHelperScope guard(this, scratch, zn);
+      clastb(scratch, pg, scratch, zm);
+    }
+    Mov(zd, scratch);
+  } else {
+    MovprfxHelperScope guard(this, zd, zn);
+    clastb(zd, pg, zd, zm);
+  }
+}
+
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc
index 20364616..008179e4 100644
--- a/src/aarch64/operands-aarch64.cc
+++ b/src/aarch64/operands-aarch64.cc
@@ -30,32 +30,32 @@ namespace vixl {
 namespace aarch64 {
 
 // CPURegList utilities.
-CPURegister CPURegList::PopLowestIndex() {
-  if (IsEmpty()) {
-    return NoCPUReg;
-  }
-  int index = CountTrailingZeros(list_);
-  VIXL_ASSERT((1 << index) & list_);
+CPURegister CPURegList::PopLowestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountTrailingZeros(list);
+  VIXL_ASSERT(((1 << index) & list) != 0);
   Remove(index);
   return CPURegister(index, size_, type_);
 }
 
 
-CPURegister CPURegList::PopHighestIndex() {
-  VIXL_ASSERT(IsValid());
-  if (IsEmpty()) {
-    return NoCPUReg;
-  }
-  int index = CountLeadingZeros(list_);
+CPURegister CPURegList::PopHighestIndex(RegList mask) {
+  RegList list = list_ & mask;
+  if (list == 0) return NoCPUReg;
+  int index = CountLeadingZeros(list);
   index = kRegListSizeInBits - 1 - index;
-  VIXL_ASSERT((1 << index) & list_);
+  VIXL_ASSERT(((1 << index) & list) != 0);
   Remove(index);
   return CPURegister(index, size_, type_);
 }
 
 
 bool CPURegList::IsValid() const {
-  if ((type_ == CPURegister::kRegister) || (type_ == CPURegister::kVRegister)) {
+  if (type_ == CPURegister::kNoRegister) {
+    // We can't use IsEmpty here because that asserts IsValid().
+    return list_ == 0;
+  } else {
     bool is_valid = true;
     // Try to create a CPURegister for each element in the list.
     for (int i = 0; i < kRegListSizeInBits; i++) {
@@ -64,11 +64,6 @@ bool CPURegList::IsValid() const {
       }
     }
     return is_valid;
-  } else if (type_ == CPURegister::kNoRegister) {
-    // We can't use IsEmpty here because that asserts IsValid().
-    return list_ == 0;
-  } else {
-    return false;
   }
 }
 
@@ -149,145 +144,6 @@ const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV();
 const CPURegList kCallerSaved = CPURegList::GetCallerSaved();
 const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV();
 
-
-// Registers.
-#define WREG(n) w##n,
-const Register Register::wregisters[] = {AARCH64_REGISTER_CODE_LIST(WREG)};
-#undef WREG
-
-#define XREG(n) x##n,
-const Register Register::xregisters[] = {AARCH64_REGISTER_CODE_LIST(XREG)};
-#undef XREG
-
-#define BREG(n) b##n,
-const VRegister VRegister::bregisters[] = {AARCH64_REGISTER_CODE_LIST(BREG)};
-#undef BREG
-
-#define HREG(n) h##n,
-const VRegister VRegister::hregisters[] = {AARCH64_REGISTER_CODE_LIST(HREG)};
-#undef HREG
-
-#define SREG(n) s##n,
-const VRegister VRegister::sregisters[] = {AARCH64_REGISTER_CODE_LIST(SREG)};
-#undef SREG
-
-#define DREG(n) d##n,
-const VRegister VRegister::dregisters[] = {AARCH64_REGISTER_CODE_LIST(DREG)};
-#undef DREG
-
-#define QREG(n) q##n,
-const VRegister VRegister::qregisters[] = {AARCH64_REGISTER_CODE_LIST(QREG)};
-#undef QREG
-
-#define VREG(n) v##n,
-const VRegister VRegister::vregisters[] = {AARCH64_REGISTER_CODE_LIST(VREG)};
-#undef VREG
-
-
-const Register& Register::GetWRegFromCode(unsigned code) {
-  if (code == kSPRegInternalCode) {
-    return wsp;
-  } else {
-    VIXL_ASSERT(code < kNumberOfRegisters);
-    return wregisters[code];
-  }
-}
-
-
-const Register& Register::GetXRegFromCode(unsigned code) {
-  if (code == kSPRegInternalCode) {
-    return sp;
-  } else {
-    VIXL_ASSERT(code < kNumberOfRegisters);
-    return xregisters[code];
-  }
-}
-
-
-const VRegister& VRegister::GetBRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return bregisters[code];
-}
-
-
-const VRegister& VRegister::GetHRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return hregisters[code];
-}
-
-
-const VRegister& VRegister::GetSRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return sregisters[code];
-}
-
-
-const VRegister& VRegister::GetDRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return dregisters[code];
-}
-
-
-const VRegister& VRegister::GetQRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return qregisters[code];
-}
-
-
-const VRegister& VRegister::GetVRegFromCode(unsigned code) {
-  VIXL_ASSERT(code < kNumberOfVRegisters);
-  return vregisters[code];
-}
-
-
-const Register& CPURegister::W() const {
-  VIXL_ASSERT(IsValidRegister());
-  return Register::GetWRegFromCode(code_);
-}
-
-
-const Register& CPURegister::X() const {
-  VIXL_ASSERT(IsValidRegister());
-  return Register::GetXRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::B() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetBRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::H() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetHRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::S() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetSRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::D() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetDRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::Q() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetQRegFromCode(code_);
-}
-
-
-const VRegister& CPURegister::V() const {
-  VIXL_ASSERT(IsValidVRegister());
-  return VRegister::GetVRegFromCode(code_);
-}
-
-
 // Operand.
 Operand::Operand(int64_t immediate)
     : immediate_(immediate),
@@ -296,6 +152,12 @@ Operand::Operand(int64_t immediate)
       extend_(NO_EXTEND),
       shift_amount_(0) {}
 
+Operand::Operand(IntegerOperand immediate)
+    : immediate_(immediate.AsIntN(64)),
+      reg_(NoReg),
+      shift_(NO_SHIFT),
+      extend_(NO_EXTEND),
+      shift_amount_(0) {}
 
 Operand::Operand(Register reg, Shift shift, unsigned shift_amount)
     : reg_(reg),
@@ -471,6 +333,24 @@ MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode)
 }
 
 
+bool MemOperand::IsPlainRegister() const {
+  return IsImmediateOffset() && (GetOffset() == 0);
+}
+
+
+bool MemOperand::IsEquivalentToPlainRegister() const {
+  if (regoffset_.Is(NoReg)) {
+    // Immediate offset, pre-index or post-index.
+    return GetOffset() == 0;
+  } else if (GetRegisterOffset().IsZero()) {
+    // Zero register offset, pre-index or post-index.
+    // We can ignore shift and extend options because they all result in zero.
+    return true;
+  }
+  return false;
+}
+
+
 bool MemOperand::IsImmediateOffset() const {
   return (addrmode_ == Offset) && regoffset_.Is(NoReg);
 }
@@ -493,6 +373,62 @@ void MemOperand::AddOffset(int64_t offset) {
 }
 
 
+bool SVEMemOperand::IsValid() const {
+#ifdef VIXL_DEBUG
+  {
+    // It should not be possible for an SVEMemOperand to match multiple types.
+    int count = 0;
+    if (IsScalarPlusImmediate()) count++;
+    if (IsScalarPlusScalar()) count++;
+    if (IsScalarPlusVector()) count++;
+    if (IsVectorPlusImmediate()) count++;
+    if (IsVectorPlusVector()) count++;
+    VIXL_ASSERT(count <= 1);
+  }
+#endif
+
+  // We can't have a register _and_ an immediate offset.
+  if ((offset_ != 0) && (!regoffset_.IsNone())) return false;
+
+  if (shift_amount_ != 0) {
+    // Only shift and extend modifiers can take a shift amount.
+    switch (mod_) {
+      case NO_SVE_OFFSET_MODIFIER:
+      case SVE_MUL_VL:
+        return false;
+      case SVE_LSL:
+      case SVE_UXTW:
+      case SVE_SXTW:
+        // Fall through.
+        break;
+    }
+  }
+
+  return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
+         IsScalarPlusVector() || IsVectorPlusImmediate() ||
+         IsVectorPlusVector();
+}
+
+
+bool SVEMemOperand::IsEquivalentToScalar() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  if (IsScalarPlusScalar()) {
+    // We can ignore the shift because it will still result in zero.
+    return GetScalarOffset().IsZero();
+  }
+  // Forms involving vectors are never equivalent to a single scalar.
+  return false;
+}
+
+bool SVEMemOperand::IsPlainRegister() const {
+  if (IsScalarPlusImmediate()) {
+    return GetImmediateOffset() == 0;
+  }
+  return false;
+}
+
 GenericOperand::GenericOperand(const CPURegister& reg)
     : cpu_register_(reg), mem_op_size_(0) {
   if (reg.IsQ()) {
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index bfc6b702..ad03a9ee 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -27,525 +27,15 @@
 #ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_
 #define VIXL_AARCH64_OPERANDS_AARCH64_H_
 
+#include <sstream>
+#include <string>
+
 #include "instructions-aarch64.h"
+#include "registers-aarch64.h"
 
 namespace vixl {
 namespace aarch64 {
 
-typedef uint64_t RegList;
-static const int kRegListSizeInBits = sizeof(RegList) * 8;
-
-
-// Registers.
-
-// Some CPURegister methods can return Register or VRegister types, so we need
-// to declare them in advance.
-class Register;
-class VRegister;
-
-class CPURegister {
- public:
-  enum RegisterType {
-    // The kInvalid value is used to detect uninitialized static instances,
-    // which are always zero-initialized before any constructors are called.
-    kInvalid = 0,
-    kRegister,
-    kVRegister,
-    kNoRegister
-  };
-
-  CPURegister() : code_(0), size_(0), type_(kNoRegister) {
-    VIXL_ASSERT(!IsValid());
-    VIXL_ASSERT(IsNone());
-  }
-
-  CPURegister(unsigned code, unsigned size, RegisterType type)
-      : code_(code), size_(size), type_(type) {
-    VIXL_ASSERT(IsValidOrNone());
-  }
-
-  unsigned GetCode() const {
-    VIXL_ASSERT(IsValid());
-    return code_;
-  }
-  VIXL_DEPRECATED("GetCode", unsigned code() const) { return GetCode(); }
-
-  RegisterType GetType() const {
-    VIXL_ASSERT(IsValidOrNone());
-    return type_;
-  }
-  VIXL_DEPRECATED("GetType", RegisterType type() const) { return GetType(); }
-
-  RegList GetBit() const {
-    VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
-    return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
-  }
-  VIXL_DEPRECATED("GetBit", RegList Bit() const) { return GetBit(); }
-
-  int GetSizeInBytes() const {
-    VIXL_ASSERT(IsValid());
-    VIXL_ASSERT(size_ % 8 == 0);
-    return size_ / 8;
-  }
-  VIXL_DEPRECATED("GetSizeInBytes", int SizeInBytes() const) {
-    return GetSizeInBytes();
-  }
-
-  int GetSizeInBits() const {
-    VIXL_ASSERT(IsValid());
-    return size_;
-  }
-  VIXL_DEPRECATED("GetSizeInBits", unsigned size() const) {
-    return GetSizeInBits();
-  }
-  VIXL_DEPRECATED("GetSizeInBits", int SizeInBits() const) {
-    return GetSizeInBits();
-  }
-
-  bool Is8Bits() const {
-    VIXL_ASSERT(IsValid());
-    return size_ == 8;
-  }
-
-  bool Is16Bits() const {
-    VIXL_ASSERT(IsValid());
-    return size_ == 16;
-  }
-
-  bool Is32Bits() const {
-    VIXL_ASSERT(IsValid());
-    return size_ == 32;
-  }
-
-  bool Is64Bits() const {
-    VIXL_ASSERT(IsValid());
-    return size_ == 64;
-  }
-
-  bool Is128Bits() const {
-    VIXL_ASSERT(IsValid());
-    return size_ == 128;
-  }
-
-  bool IsValid() const {
-    if (IsValidRegister() || IsValidVRegister()) {
-      VIXL_ASSERT(!IsNone());
-      return true;
-    } else {
-      // This assert is hit when the register has not been properly initialized.
-      // One cause for this can be an initialisation order fiasco. See
-      // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
-      VIXL_ASSERT(IsNone());
-      return false;
-    }
-  }
-
-  bool IsValidRegister() const {
-    return IsRegister() && ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
-           ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
-  }
-
-  bool IsValidVRegister() const {
-    return IsVRegister() && ((size_ == kBRegSize) || (size_ == kHRegSize) ||
-                             (size_ == kSRegSize) || (size_ == kDRegSize) ||
-                             (size_ == kQRegSize)) &&
-           (code_ < kNumberOfVRegisters);
-  }
-
-  bool IsValidFPRegister() const {
-    return IsValidVRegister() && IsFPRegister();
-  }
-
-  bool IsNone() const {
-    // kNoRegister types should always have size 0 and code 0.
-    VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
-    VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
-
-    return type_ == kNoRegister;
-  }
-
-  bool Aliases(const CPURegister& other) const {
-    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
-    return (code_ == other.code_) && (type_ == other.type_);
-  }
-
-  bool Is(const CPURegister& other) const {
-    VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
-    return Aliases(other) && (size_ == other.size_);
-  }
-
-  bool IsZero() const {
-    VIXL_ASSERT(IsValid());
-    return IsRegister() && (code_ == kZeroRegCode);
-  }
-
-  bool IsSP() const {
-    VIXL_ASSERT(IsValid());
-    return IsRegister() && (code_ == kSPRegInternalCode);
-  }
-
-  bool IsRegister() const { return type_ == kRegister; }
-
-  bool IsVRegister() const { return type_ == kVRegister; }
-
-  // CPURegister does not track lanes like VRegister does, so we have to assume
-  // that we have scalar types here.
-  // TODO: Encode lane information in CPURegister so that we can be consistent.
-  bool IsFPRegister() const { return IsH() || IsS() || IsD(); }
-
-  bool IsW() const { return IsValidRegister() && Is32Bits(); }
-  bool IsX() const { return IsValidRegister() && Is64Bits(); }
-
-  // These assertions ensure that the size and type of the register are as
-  // described. They do not consider the number of lanes that make up a vector.
-  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
-  // does not imply Is1D() or Is8B().
-  // Check the number of lanes, ie. the format of the vector, using methods such
-  // as Is8B(), Is1D(), etc. in the VRegister class.
-  bool IsV() const { return IsVRegister(); }
-  bool IsB() const { return IsV() && Is8Bits(); }
-  bool IsH() const { return IsV() && Is16Bits(); }
-  bool IsS() const { return IsV() && Is32Bits(); }
-  bool IsD() const { return IsV() && Is64Bits(); }
-  bool IsQ() const { return IsV() && Is128Bits(); }
-
-  // Semantic type for sdot and udot instructions.
-  bool IsS4B() const { return IsS(); }
-  const VRegister& S4B() const { return S(); }
-
-  const Register& W() const;
-  const Register& X() const;
-  const VRegister& V() const;
-  const VRegister& B() const;
-  const VRegister& H() const;
-  const VRegister& S() const;
-  const VRegister& D() const;
-  const VRegister& Q() const;
-
-  bool IsSameType(const CPURegister& other) const {
-    return type_ == other.type_;
-  }
-
-  bool IsSameSizeAndType(const CPURegister& other) const {
-    return (size_ == other.size_) && IsSameType(other);
-  }
-
- protected:
-  unsigned code_;
-  int size_;
-  RegisterType type_;
-
- private:
-  bool IsValidOrNone() const { return IsValid() || IsNone(); }
-};
-
-
-class Register : public CPURegister {
- public:
-  Register() : CPURegister() {}
-  explicit Register(const CPURegister& other)
-      : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()) {
-    VIXL_ASSERT(IsValidRegister());
-  }
-  Register(unsigned code, unsigned size) : CPURegister(code, size, kRegister) {}
-
-  bool IsValid() const {
-    VIXL_ASSERT(IsRegister() || IsNone());
-    return IsValidRegister();
-  }
-
-  static const Register& GetWRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetWRegFromCode",
-                  static const Register& WRegFromCode(unsigned code)) {
-    return GetWRegFromCode(code);
-  }
-
-  static const Register& GetXRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetXRegFromCode",
-                  static const Register& XRegFromCode(unsigned code)) {
-    return GetXRegFromCode(code);
-  }
-
- private:
-  static const Register wregisters[];
-  static const Register xregisters[];
-};
-
-
-namespace internal {
-
-template <int size_in_bits>
-class FixedSizeRegister : public Register {
- public:
-  FixedSizeRegister() : Register() {}
-  explicit FixedSizeRegister(unsigned code) : Register(code, size_in_bits) {
-    VIXL_ASSERT(IsValidRegister());
-  }
-  explicit FixedSizeRegister(const Register& other)
-      : Register(other.GetCode(), size_in_bits) {
-    VIXL_ASSERT(other.GetSizeInBits() == size_in_bits);
-    VIXL_ASSERT(IsValidRegister());
-  }
-  explicit FixedSizeRegister(const CPURegister& other)
-      : Register(other.GetCode(), other.GetSizeInBits()) {
-    VIXL_ASSERT(other.GetType() == kRegister);
-    VIXL_ASSERT(other.GetSizeInBits() == size_in_bits);
-    VIXL_ASSERT(IsValidRegister());
-  }
-
-  bool IsValid() const {
-    return Register::IsValid() && (GetSizeInBits() == size_in_bits);
-  }
-};
-
-}  // namespace internal
-
-typedef internal::FixedSizeRegister<kXRegSize> XRegister;
-typedef internal::FixedSizeRegister<kWRegSize> WRegister;
-
-
-class VRegister : public CPURegister {
- public:
-  VRegister() : CPURegister(), lanes_(1) {}
-  explicit VRegister(const CPURegister& other)
-      : CPURegister(other.GetCode(), other.GetSizeInBits(), other.GetType()),
-        lanes_(1) {
-    VIXL_ASSERT(IsValidVRegister());
-    VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
-  }
-  VRegister(unsigned code, unsigned size, unsigned lanes = 1)
-      : CPURegister(code, size, kVRegister), lanes_(lanes) {
-    VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
-  }
-  VRegister(unsigned code, VectorFormat format)
-      : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
-        lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
-    VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
-  }
-
-  bool IsValid() const {
-    VIXL_ASSERT(IsVRegister() || IsNone());
-    return IsValidVRegister();
-  }
-
-  static const VRegister& GetBRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetBRegFromCode",
-                  static const VRegister& BRegFromCode(unsigned code)) {
-    return GetBRegFromCode(code);
-  }
-
-  static const VRegister& GetHRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetHRegFromCode",
-                  static const VRegister& HRegFromCode(unsigned code)) {
-    return GetHRegFromCode(code);
-  }
-
-  static const VRegister& GetSRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetSRegFromCode",
-                  static const VRegister& SRegFromCode(unsigned code)) {
-    return GetSRegFromCode(code);
-  }
-
-  static const VRegister& GetDRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetDRegFromCode",
-                  static const VRegister& DRegFromCode(unsigned code)) {
-    return GetDRegFromCode(code);
-  }
-
-  static const VRegister& GetQRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetQRegFromCode",
-                  static const VRegister& QRegFromCode(unsigned code)) {
-    return GetQRegFromCode(code);
-  }
-
-  static const VRegister& GetVRegFromCode(unsigned code);
-  VIXL_DEPRECATED("GetVRegFromCode",
-                  static const VRegister& VRegFromCode(unsigned code)) {
-    return GetVRegFromCode(code);
-  }
-
-  VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
-  VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
-  VRegister V2H() const { return VRegister(code_, kSRegSize, 2); }
-  VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
-  VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
-  VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
-  VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
-  VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
-  VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
-
-  bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
-  bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
-  bool Is2H() const { return (Is32Bits() && (lanes_ == 2)); }
-  bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
-  bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
-  bool Is1S() const { return (Is32Bits() && (lanes_ == 1)); }
-  bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
-  bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
-  bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
-  bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
-
-  // For consistency, we assert the number of lanes of these scalar registers,
-  // even though there are no vectors of equivalent total size with which they
-  // could alias.
-  bool Is1B() const {
-    VIXL_ASSERT(!(Is8Bits() && IsVector()));
-    return Is8Bits();
-  }
-  bool Is1H() const {
-    VIXL_ASSERT(!(Is16Bits() && IsVector()));
-    return Is16Bits();
-  }
-
-  // Semantic type for sdot and udot instructions.
-  bool Is1S4B() const { return Is1S(); }
-
-
-  bool IsLaneSizeB() const { return GetLaneSizeInBits() == kBRegSize; }
-  bool IsLaneSizeH() const { return GetLaneSizeInBits() == kHRegSize; }
-  bool IsLaneSizeS() const { return GetLaneSizeInBits() == kSRegSize; }
-  bool IsLaneSizeD() const { return GetLaneSizeInBits() == kDRegSize; }
-
-  int GetLanes() const { return lanes_; }
-  VIXL_DEPRECATED("GetLanes", int lanes() const) { return GetLanes(); }
-
-  bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
-  bool IsValidFPRegister() const {
-    return IsValidVRegister() && IsFPRegister();
-  }
-
-  bool IsScalar() const { return lanes_ == 1; }
-
-  bool IsVector() const { return lanes_ > 1; }
-
-  bool IsSameFormat(const VRegister& other) const {
-    return (size_ == other.size_) && (lanes_ == other.lanes_);
-  }
-
-  unsigned GetLaneSizeInBytes() const { return GetSizeInBytes() / lanes_; }
-  VIXL_DEPRECATED("GetLaneSizeInBytes", unsigned LaneSizeInBytes() const) {
-    return GetLaneSizeInBytes();
-  }
-
-  unsigned GetLaneSizeInBits() const { return GetLaneSizeInBytes() * 8; }
-  VIXL_DEPRECATED("GetLaneSizeInBits", unsigned LaneSizeInBits() const) {
-    return GetLaneSizeInBits();
-  }
-
- private:
-  static const VRegister bregisters[];
-  static const VRegister hregisters[];
-  static const VRegister sregisters[];
-  static const VRegister dregisters[];
-  static const VRegister qregisters[];
-  static const VRegister vregisters[];
-  int lanes_;
-};
-
-
-// No*Reg is used to indicate an unused argument, or an error case. Note that
-// these all compare equal (using the Is() method). The Register and VRegister
-// variants are provided for convenience.
-const Register NoReg;
-const VRegister NoVReg;
-const CPURegister NoCPUReg;
-
-
-#define DEFINE_REGISTERS(N) \
-  const WRegister w##N(N);  \
-  const XRegister x##N(N);
-AARCH64_REGISTER_CODE_LIST(DEFINE_REGISTERS)
-#undef DEFINE_REGISTERS
-const WRegister wsp(kSPRegInternalCode);
-const XRegister sp(kSPRegInternalCode);
-
-
-#define DEFINE_VREGISTERS(N)          \
-  const VRegister b##N(N, kBRegSize); \
-  const VRegister h##N(N, kHRegSize); \
-  const VRegister s##N(N, kSRegSize); \
-  const VRegister d##N(N, kDRegSize); \
-  const VRegister q##N(N, kQRegSize); \
-  const VRegister v##N(N, kQRegSize);
-AARCH64_REGISTER_CODE_LIST(DEFINE_VREGISTERS)
-#undef DEFINE_VREGISTERS
-
-
-// Register aliases.
-const XRegister ip0 = x16;
-const XRegister ip1 = x17;
-const XRegister lr = x30;
-const XRegister xzr = x31;
-const WRegister wzr = w31;
-
-
-// AreAliased returns true if any of the named registers overlap. Arguments
-// set to NoReg are ignored. The system stack pointer may be specified.
-bool AreAliased(const CPURegister& reg1,
-                const CPURegister& reg2,
-                const CPURegister& reg3 = NoReg,
-                const CPURegister& reg4 = NoReg,
-                const CPURegister& reg5 = NoReg,
-                const CPURegister& reg6 = NoReg,
-                const CPURegister& reg7 = NoReg,
-                const CPURegister& reg8 = NoReg);
-
-
-// AreSameSizeAndType returns true if all of the specified registers have the
-// same size, and are of the same type. The system stack pointer may be
-// specified. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
-bool AreSameSizeAndType(const CPURegister& reg1,
-                        const CPURegister& reg2,
-                        const CPURegister& reg3 = NoCPUReg,
-                        const CPURegister& reg4 = NoCPUReg,
-                        const CPURegister& reg5 = NoCPUReg,
-                        const CPURegister& reg6 = NoCPUReg,
-                        const CPURegister& reg7 = NoCPUReg,
-                        const CPURegister& reg8 = NoCPUReg);
-
-// AreEven returns true if all of the specified registers have even register
-// indices. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
-bool AreEven(const CPURegister& reg1,
-             const CPURegister& reg2,
-             const CPURegister& reg3 = NoReg,
-             const CPURegister& reg4 = NoReg,
-             const CPURegister& reg5 = NoReg,
-             const CPURegister& reg6 = NoReg,
-             const CPURegister& reg7 = NoReg,
-             const CPURegister& reg8 = NoReg);
-
-
-// AreConsecutive returns true if all of the specified registers are
-// consecutive in the register file. Arguments set to NoReg are ignored, as are
-// any subsequent arguments. At least one argument (reg1) must be valid
-// (not NoCPUReg).
-bool AreConsecutive(const CPURegister& reg1,
-                    const CPURegister& reg2,
-                    const CPURegister& reg3 = NoCPUReg,
-                    const CPURegister& reg4 = NoCPUReg);
-
-
-// AreSameFormat returns true if all of the specified VRegisters have the same
-// vector format. Arguments set to NoReg are ignored, as are any subsequent
-// arguments. At least one argument (reg1) must be valid (not NoVReg).
-bool AreSameFormat(const VRegister& reg1,
-                   const VRegister& reg2,
-                   const VRegister& reg3 = NoVReg,
-                   const VRegister& reg4 = NoVReg);
-
-
-// AreConsecutive returns true if all of the specified VRegisters are
-// consecutive in the register file. Arguments set to NoReg are ignored, as are
-// any subsequent arguments. At least one argument (reg1) must be valid
-// (not NoVReg).
-bool AreConsecutive(const VRegister& reg1,
-                    const VRegister& reg2,
-                    const VRegister& reg3 = NoVReg,
-                    const VRegister& reg4 = NoVReg);
-
-
 // Lists of registers.
 class CPURegList {
  public:
@@ -580,6 +70,28 @@ class CPURegList {
     VIXL_ASSERT(IsValid());
   }
 
+  // Construct an empty CPURegList with the specified size and type. If `size`
+  // is CPURegister::kUnknownSize and the register type requires a size, a valid
+  // but unspecified default will be picked.
+  static CPURegList Empty(CPURegister::RegisterType type,
+                          unsigned size = CPURegister::kUnknownSize) {
+    return CPURegList(type, GetDefaultSizeFor(type, size), 0);
+  }
+
+  // Construct a CPURegList with all possible registers with the specified size
+  // and type. If `size` is CPURegister::kUnknownSize and the register type
+  // requires a size, a valid but unspecified default will be picked.
+  static CPURegList All(CPURegister::RegisterType type,
+                        unsigned size = CPURegister::kUnknownSize) {
+    unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1);
+    RegList list = (static_cast<RegList>(1) << number_of_registers) - 1;
+    if (type == CPURegister::kRegister) {
+      // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it.
+      list |= (static_cast<RegList>(1) << kSPRegInternalCode);
+    }
+    return CPURegList(type, GetDefaultSizeFor(type, size), list);
+  }
+
   CPURegister::RegisterType GetType() const {
     VIXL_ASSERT(IsValid());
     return type_;
@@ -588,6 +100,10 @@ class CPURegList {
     return GetType();
   }
 
+  CPURegister::RegisterBank GetBank() const {
+    return CPURegister::GetBankFor(GetType());
+  }
+
   // Combine another CPURegList into this one. Registers that already exist in
   // this list are left unchanged. The type and size of the registers in the
   // 'other' list must match those in this list.
@@ -684,8 +200,11 @@ class CPURegList {
   // preparing registers for an AAPCS64 function call, for example.
   void RemoveCalleeSaved();
 
-  CPURegister PopLowestIndex();
-  CPURegister PopHighestIndex();
+  // Find the register in this list that appears in `mask` with the lowest or
+  // highest code, remove it from the list and return it as a CPURegister. If
+  // the list is empty, leave it unchanged and return NoCPUReg.
+  CPURegister PopLowestIndex(RegList mask = ~static_cast<RegList>(0));
+  CPURegister PopHighestIndex(RegList mask = ~static_cast<RegList>(0));
 
   // AAPCS64 callee-saved registers.
   static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
@@ -704,7 +223,7 @@ class CPURegList {
 
   bool IncludesAliasOf(const CPURegister& other) const {
     VIXL_ASSERT(IsValid());
-    return (type_ == other.GetType()) && IncludesAliasOf(other.GetCode());
+    return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode());
   }
 
   bool IncludesAliasOf(int code) const {
@@ -744,6 +263,21 @@ class CPURegList {
   }
 
  private:
+  // If `size` is CPURegister::kUnknownSize and the type requires a known size,
+  // then return an arbitrary-but-valid size.
+  //
+  // Otherwise, the size is checked for validity and returned unchanged.
+  static unsigned GetDefaultSizeFor(CPURegister::RegisterType type,
+                                    unsigned size) {
+    if (size == CPURegister::kUnknownSize) {
+      if (type == CPURegister::kRegister) size = kXRegSize;
+      if (type == CPURegister::kVRegister) size = kQRegSize;
+      // All other types require kUnknownSize.
+    }
+    VIXL_ASSERT(CPURegister(0, size, type).IsValid());
+    return size;
+  }
+
   RegList list_;
   int size_;
   CPURegister::RegisterType type_;
@@ -761,6 +295,7 @@ extern const CPURegList kCalleeSavedV;
 extern const CPURegList kCallerSaved;
 extern const CPURegList kCallerSavedV;
 
+class IntegerOperand;
 
 // Operand.
 class Operand {
@@ -769,7 +304,9 @@ class Operand {
   // where <immediate> is int64_t.
   // This is allowed to be an implicit constructor because Operand is
   // a wrapper class that doesn't normally perform any type conversion.
-  Operand(int64_t immediate = 0);  // NOLINT(runtime/explicit)
+  Operand(int64_t immediate);  // NOLINT(runtime/explicit)
+
+  Operand(IntegerOperand immediate);  // NOLINT(runtime/explicit)
 
   // rm, {<shift> #<shift_amount>}
   // where <shift> is one of {LSL, LSR, ASR, ROR}.
@@ -883,6 +420,16 @@ class MemOperand {
     return shift_amount_;
   }
 
+  // True for MemOperands which represent something like [x0].
+  // Currently, this will also return true for [x0, #0], because MemOperand has
+  // no way to distinguish the two.
+  bool IsPlainRegister() const;
+
+  // True for MemOperands which represent something like [x0], or for compound
+  // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr]
+  // or [x0, wzr, UXTW #3].
+  bool IsEquivalentToPlainRegister() const;
+
   // True for immediate-offset (but not indexed) MemOperands.
   bool IsImmediateOffset() const;
   // True for register-offset (but not indexed) MemOperands.
@@ -918,6 +465,448 @@ class MemOperand {
   unsigned shift_amount_;
 };
 
+// SVE supports memory operands which don't make sense to the core ISA, such as
+// scatter-gather forms, in which either the base or offset registers are
+// vectors. This class exists to avoid complicating core-ISA code with
+// SVE-specific behaviour.
+//
+// Note that SVE does not support any pre- or post-index modes.
+class SVEMemOperand {
+ public:
+  // "vector-plus-immediate", like [z0.s, #21]
+  explicit SVEMemOperand(ZRegister base, uint64_t offset = 0)
+      : base_(base),
+        regoffset_(NoReg),
+        offset_(RawbitsToInt64(offset)),
+        mod_(NO_SVE_OFFSET_MODIFIER),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsVectorPlusImmediate());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL]
+  // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL.
+  //
+  // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and
+  // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar
+  // instructions where xm defaults to xzr. However, users should not rely on
+  // `SVEMemOperand(x0, 0)` being accepted in such cases.
+  explicit SVEMemOperand(Register base,
+                         uint64_t offset = 0,
+                         SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER)
+      : base_(base),
+        regoffset_(NoReg),
+        offset_(RawbitsToInt64(offset)),
+        mod_(mod),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsScalarPlusImmediate());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-scalar", like [x0, x1]
+  // "scalar-plus-vector", like [x0, z1.d]
+  SVEMemOperand(Register base, CPURegister offset)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(NO_SVE_OFFSET_MODIFIER),
+        shift_amount_(0) {
+    VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector());
+    if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-vector", like [x0, z1.d, UXTW]
+  // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a
+  // corresponding `Extend` value.
+  template <typename M>
+  SVEMemOperand(Register base, ZRegister offset, M mod)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(0) {
+    VIXL_ASSERT(mod_ != SVE_LSL);  // LSL requires an explicit shift amount.
+    VIXL_ASSERT(IsScalarPlusVector());
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "scalar-plus-scalar", like [x0, x1, LSL #1]
+  // "scalar-plus-vector", like [x0, z1.d, LSL #2]
+  // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding
+  // `Shift` or `Extend` value.
+  template <typename M>
+  SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(shift_amount) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // "vector-plus-vector", like [z0.d, z1.d, UXTW]
+  template <typename M = SVEOffsetModifier>
+  SVEMemOperand(ZRegister base,
+                ZRegister offset,
+                M mod = NO_SVE_OFFSET_MODIFIER,
+                unsigned shift_amount = 0)
+      : base_(base),
+        regoffset_(offset),
+        offset_(0),
+        mod_(GetSVEOffsetModifierFor(mod)),
+        shift_amount_(shift_amount) {
+    VIXL_ASSERT(IsValid());
+    VIXL_ASSERT(IsVectorPlusVector());
+  }
+
+  // True for SVEMemOperands which represent something like [x0].
+  // This will also return true for [x0, #0], because there is no way
+  // to distinguish the two.
+  bool IsPlainScalar() const {
+    return IsScalarPlusImmediate() && (offset_ == 0);
+  }
+
+  // True for SVEMemOperands which represent something like [x0], or for
+  // compound SVEMemOperands which are functionally equivalent, such as
+  // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3].
+  bool IsEquivalentToScalar() const;
+
+  // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and
+  // similar.
+  bool IsPlainRegister() const;
+
+  bool IsScalarPlusImmediate() const {
+    return base_.IsX() && regoffset_.IsNone() &&
+           ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl());
+  }
+
+  bool IsScalarPlusScalar() const {
+    // SVE offers no extend modes for scalar-plus-scalar, so both registers must
+    // be X registers.
+    return base_.IsX() && regoffset_.IsX() &&
+           ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL));
+  }
+
+  bool IsScalarPlusVector() const {
+    // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike
+    // in the core ISA, these extend modes do not imply an S-sized lane, so the
+    // modifier is independent from the lane size. The architecture describes
+    // [US]XTW with a D-sized lane as an "unpacked" offset.
+    return base_.IsX() && regoffset_.IsZRegister() &&
+           (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl();
+  }
+
+  bool IsVectorPlusImmediate() const {
+    return base_.IsZRegister() &&
+           (base_.IsLaneSizeS() || base_.IsLaneSizeD()) &&
+           regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER);
+  }
+
+  bool IsVectorPlusVector() const {
+    return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) &&
+           AreSameFormat(base_, regoffset_) &&
+           (base_.IsLaneSizeS() || base_.IsLaneSizeD());
+  }
+
+  bool IsContiguous() const { return !IsScatterGather(); }
+  bool IsScatterGather() const {
+    return base_.IsZRegister() || regoffset_.IsZRegister();
+  }
+
+  // TODO: If necessary, add helpers like `HasScalarBase()`.
+
+  Register GetScalarBase() const {
+    VIXL_ASSERT(base_.IsX());
+    return Register(base_);
+  }
+
+  ZRegister GetVectorBase() const {
+    VIXL_ASSERT(base_.IsZRegister());
+    VIXL_ASSERT(base_.HasLaneSize());
+    return ZRegister(base_);
+  }
+
+  Register GetScalarOffset() const {
+    VIXL_ASSERT(regoffset_.IsRegister());
+    return Register(regoffset_);
+  }
+
+  ZRegister GetVectorOffset() const {
+    VIXL_ASSERT(regoffset_.IsZRegister());
+    VIXL_ASSERT(regoffset_.HasLaneSize());
+    return ZRegister(regoffset_);
+  }
+
+  int64_t GetImmediateOffset() const {
+    VIXL_ASSERT(regoffset_.IsNone());
+    return offset_;
+  }
+
+  SVEOffsetModifier GetOffsetModifier() const { return mod_; }
+  unsigned GetShiftAmount() const { return shift_amount_; }
+
+  bool IsEquivalentToLSL(unsigned amount) const {
+    if (shift_amount_ != amount) return false;
+    if (amount == 0) {
+      // No-shift is equivalent to "LSL #0".
+      return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER));
+    }
+    return mod_ == SVE_LSL;
+  }
+
+  bool IsMulVl() const { return mod_ == SVE_MUL_VL; }
+
+  bool IsValid() const;
+
+ private:
+  // Allow standard `Shift` and `Extend` arguments to be used.
+  SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) {
+    if (shift == LSL) return SVE_LSL;
+    if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER;
+    // SVE does not accept any other shift.
+    VIXL_UNIMPLEMENTED();
+    return NO_SVE_OFFSET_MODIFIER;
+  }
+
+  SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) {
+    if (extend == UXTW) return SVE_UXTW;
+    if (extend == SXTW) return SVE_SXTW;
+    if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER;
+    // SVE does not accept any other extend mode.
+    VIXL_UNIMPLEMENTED();
+    return NO_SVE_OFFSET_MODIFIER;
+  }
+
+  SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) {
+    return mod;
+  }
+
+  CPURegister base_;
+  CPURegister regoffset_;
+  int64_t offset_;
+  SVEOffsetModifier mod_;
+  unsigned shift_amount_;
+};
+
+// Represent a signed or unsigned integer operand.
+//
+// This is designed to make instructions which naturally accept a _signed_
+// immediate easier to implement and use, when we also want users to be able to
+// specify raw-bits values (such as with hexadecimal constants). The advantage
+// of this class over a simple uint64_t (with implicit C++ sign-extension) is
+// that this class can strictly check the range of allowed values. With a simple
+// uint64_t, it is impossible to distinguish -1 from UINT64_MAX.
+//
+// For example, these instructions are equivalent:
+//
+//     __ Insr(z0.VnB(), -1);
+//     __ Insr(z0.VnB(), 0xff);
+//
+// ... as are these:
+//
+//     __ Insr(z0.VnD(), -1);
+//     __ Insr(z0.VnD(), 0xffffffffffffffff);
+//
+// ... but this is invalid:
+//
+//     __ Insr(z0.VnB(), 0xffffffffffffffff);  // Too big for B-sized lanes.
+class IntegerOperand {
+ public:
+#define VIXL_INT_TYPES(V) \
+  V(char) V(short) V(int) V(long) V(long long)  // NOLINT(runtime/int)
+#define VIXL_DECL_INT_OVERLOADS(T)                                        \
+  /* These are allowed to be implicit constructors because this is a */   \
+  /* wrapper class that doesn't normally perform any type conversion. */  \
+  IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */       \
+      : raw_bits_(immediate),        /* Allow implicit sign-extension. */ \
+        is_negative_(immediate < 0) {}                                    \
+  IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */     \
+      : raw_bits_(immediate), is_negative_(false) {}
+  VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS)
+#undef VIXL_DECL_INT_OVERLOADS
+#undef VIXL_INT_TYPES
+
+  // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned
+  // values will be misrepresented here.
+  explicit IntegerOperand(const Operand& operand)
+      : raw_bits_(operand.GetEquivalentImmediate()),
+        is_negative_(operand.GetEquivalentImmediate() < 0) {}
+
+  bool IsIntN(unsigned n) const {
+    return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_))
+                        : vixl::IsIntN(n, raw_bits_);
+  }
+  bool IsUintN(unsigned n) const {
+    return !is_negative_ && vixl::IsUintN(n, raw_bits_);
+  }
+
+  bool IsUint8() const { return IsUintN(8); }
+  bool IsUint16() const { return IsUintN(16); }
+  bool IsUint32() const { return IsUintN(32); }
+  bool IsUint64() const { return IsUintN(64); }
+
+  bool IsInt8() const { return IsIntN(8); }
+  bool IsInt16() const { return IsIntN(16); }
+  bool IsInt32() const { return IsIntN(32); }
+  bool IsInt64() const { return IsIntN(64); }
+
+  bool FitsInBits(unsigned n) const {
+    return is_negative_ ? IsIntN(n) : IsUintN(n);
+  }
+  bool FitsInLane(const CPURegister& zd) const {
+    return FitsInBits(zd.GetLaneSizeInBits());
+  }
+  bool FitsInSignedLane(const CPURegister& zd) const {
+    return IsIntN(zd.GetLaneSizeInBits());
+  }
+  bool FitsInUnsignedLane(const CPURegister& zd) const {
+    return IsUintN(zd.GetLaneSizeInBits());
+  }
+
+  // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to an unsigned integer
+  // in the range [0, UINT<n>_MAX] (using two's complement mapping).
+  uint64_t AsUintN(unsigned n) const {
+    VIXL_ASSERT(FitsInBits(n));
+    return raw_bits_ & GetUintMask(n);
+  }
+
+  uint8_t AsUint8() const { return static_cast<uint8_t>(AsUintN(8)); }
+  uint16_t AsUint16() const { return static_cast<uint16_t>(AsUintN(16)); }
+  uint32_t AsUint32() const { return static_cast<uint32_t>(AsUintN(32)); }
+  uint64_t AsUint64() const { return AsUintN(64); }
+
+  // Cast a value in the range [INT<n>_MIN, UINT<n>_MAX] to a signed integer in
+  // the range [INT<n>_MIN, INT<n>_MAX] (using two's complement mapping).
+  int64_t AsIntN(unsigned n) const {
+    VIXL_ASSERT(FitsInBits(n));
+    return ExtractSignedBitfield64(n - 1, 0, raw_bits_);
+  }
+
+  int8_t AsInt8() const { return static_cast<int8_t>(AsIntN(8)); }
+  int16_t AsInt16() const { return static_cast<int16_t>(AsIntN(16)); }
+  int32_t AsInt32() const { return static_cast<int32_t>(AsIntN(32)); }
+  int64_t AsInt64() const { return AsIntN(64); }
+
+  // Several instructions encode a signed int<N>_t, which is then (optionally)
+  // left-shifted and sign-extended to a Z register lane with a size which may
+  // be larger than N. This helper tries to find an int<N>_t such that the
+  // IntegerOperand's arithmetic value is reproduced in each lane.
+  //
+  // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as
+  // `Insr(z0.VnB(), -1)`.
+  template <unsigned N, unsigned kShift, typename T>
+  bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const {
+    VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+    VIXL_ASSERT(FitsInLane(zd));
+    if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+    // Reverse the specified left-shift.
+    IntegerOperand unshifted(*this);
+    unshifted.ArithmeticShiftRight(kShift);
+
+    if (unshifted.IsIntN(N)) {
+      // This is trivial, since sign-extension produces the same arithmetic
+      // value irrespective of the destination size.
+      *imm = static_cast<T>(unshifted.AsIntN(N));
+      return true;
+    }
+
+    // Otherwise, we might be able to use the sign-extension to produce the
+    // desired bit pattern. We can only do this for values in the range
+    // [INT<N>_MAX + 1, UINT<N>_MAX], where the highest set bit is the sign bit.
+    //
+    // The lane size has to be adjusted to compensate for `kShift`, since the
+    // high bits will be dropped when the encoded value is left-shifted.
+    if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) {
+      int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift);
+      if (vixl::IsIntN(N, encoded)) {
+        *imm = static_cast<T>(encoded);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // As above, but `kShift` is written to the `*shift` parameter on success, so
+  // that it is easy to chain calls like this:
+  //
+  //     if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
+  //         imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
+  //       insn(zd, imm8, shift)
+  //     }
+  template <unsigned N, unsigned kShift, typename T, typename S>
+  bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd,
+                                     T* imm,
+                                     S* shift) const {
+    if (TryEncodeAsShiftedIntNForLane<N, kShift>(zd, imm)) {
+      *shift = kShift;
+      return true;
+    }
+    return false;
+  }
+
+  // As above, but assume that `kShift` is 0.
+  template <unsigned N, typename T>
+  bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const {
+    return TryEncodeAsShiftedIntNForLane<N, 0>(zd, imm);
+  }
+
+  // As above, but for unsigned fields. This is usuaully a simple operation, but
+  // is provided for symmetry.
+  template <unsigned N, unsigned kShift, typename T>
+  bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const {
+    VIXL_STATIC_ASSERT(std::numeric_limits<T>::digits > N);
+    VIXL_ASSERT(FitsInLane(zd));
+
+    // TODO: Should we convert -1 to 0xff here?
+    if (is_negative_) return false;
+    USE(zd);
+
+    if ((raw_bits_ & GetUintMask(kShift)) != 0) return false;
+
+    if (vixl::IsUintN(N, raw_bits_ >> kShift)) {
+      *imm = static_cast<T>(raw_bits_ >> kShift);
+      return true;
+    }
+    return false;
+  }
+
+  template <unsigned N, unsigned kShift, typename T, typename S>
+  bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd,
+                                      T* imm,
+                                      S* shift) const {
+    if (TryEncodeAsShiftedUintNForLane<N, kShift>(zd, imm)) {
+      *shift = kShift;
+      return true;
+    }
+    return false;
+  }
+
+  bool IsZero() const { return raw_bits_ == 0; }
+  bool IsNegative() const { return is_negative_; }
+  bool IsPositiveOrZero() const { return !is_negative_; }
+
+  uint64_t GetMagnitude() const {
+    return is_negative_ ? -raw_bits_ : raw_bits_;
+  }
+
+ private:
+  // Shift the arithmetic value right, with sign extension if is_negative_.
+  void ArithmeticShiftRight(int shift) {
+    VIXL_ASSERT((shift >= 0) && (shift < 64));
+    if (shift == 0) return;
+    if (is_negative_) {
+      raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_);
+    } else {
+      raw_bits_ >>= shift;
+    }
+  }
+
+  uint64_t raw_bits_;
+  bool is_negative_;
+};
+
 // This an abstraction that can represent a register or memory location. The
 // `MacroAssembler` provides helpers to move data between generic operands.
 class GenericOperand {
diff --git a/src/aarch64/registers-aarch64.cc b/src/aarch64/registers-aarch64.cc
new file mode 100644
index 00000000..735f43c7
--- /dev/null
+++ b/src/aarch64/registers-aarch64.cc
@@ -0,0 +1,321 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sstream>
+#include <string>
+
+#include "registers-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+std::string CPURegister::GetArchitecturalName() const {
+  std::ostringstream name;
+  if (IsZRegister()) {
+    name << 'z' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+  } else if (IsPRegister()) {
+    name << 'p' << GetCode();
+    if (HasLaneSize()) {
+      name << '.' << GetLaneSizeSymbol();
+    }
+    switch (qualifiers_) {
+      case kNoQualifiers:
+        break;
+      case kMerging:
+        name << "/m";
+        break;
+      case kZeroing:
+        name << "/z";
+        break;
+    }
+  } else {
+    VIXL_UNIMPLEMENTED();
+  }
+  return name.str();
+}
+
+unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) {
+  switch (bank) {
+    case kNoRegisterBank:
+      return 0;
+    case kRRegisterBank:
+      return Register::GetMaxCode();
+    case kVRegisterBank:
+#ifdef VIXL_HAS_CONSTEXPR
+      VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#else
+      VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode());
+#endif
+      return VRegister::GetMaxCode();
+    case kPRegisterBank:
+      return PRegister::GetMaxCode();
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+bool CPURegister::IsValidRegister() const {
+  return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) &&
+         (bank_ == kRRegisterBank) &&
+         ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) &&
+         (qualifiers_ == kNoQualifiers) && (lane_size_ == size_);
+}
+
+bool CPURegister::IsValidVRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) &&
+         ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) &&
+         (qualifiers_ == kNoQualifiers) &&
+         (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_);
+}
+
+bool CPURegister::IsValidFPRegister() const {
+  return IsValidVRegister() && IsFPRegister();
+}
+
+bool CPURegister::IsValidZRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // Z registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) &&
+         (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers);
+}
+
+bool CPURegister::IsValidPRegister() const {
+  VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize);
+  // P registers are valid with or without a lane size, so we don't need to
+  // check lane_size_.
+  return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) &&
+         (size_ == kEncodedUnknownSize) &&
+         ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) ||
+          (qualifiers_ == kZeroing));
+}
+
+bool CPURegister::IsValid() const {
+  return IsValidRegister() || IsValidVRegister() || IsValidZRegister() ||
+         IsValidPRegister();
+}
+
+// Most coersions simply invoke the necessary constructor.
+#define VIXL_CPUREG_COERCION_LIST(U) \
+  U(Register, W, R)                  \
+  U(Register, X, R)                  \
+  U(VRegister, B, V)                 \
+  U(VRegister, H, V)                 \
+  U(VRegister, S, V)                 \
+  U(VRegister, D, V)                 \
+  U(VRegister, Q, V)                 \
+  U(VRegister, V, V)                 \
+  U(ZRegister, Z, V)                 \
+  U(PRegister, P, P)
+#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \
+  RET_TYPE CPURegister::CTOR_TYPE() const {                    \
+    VIXL_ASSERT(GetBank() == k##BANK##RegisterBank);           \
+    return CTOR_TYPE##Register(GetCode());                     \
+  }
+VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION)
+#undef VIXL_CPUREG_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_COERCION
+
+// NEON lane-format coersions always return VRegisters.
+#define VIXL_CPUREG_NEON_COERCION_LIST(V) \
+  V(8, B)                                 \
+  V(16, B)                                \
+  V(2, H)                                 \
+  V(4, H)                                 \
+  V(8, H)                                 \
+  V(2, S)                                 \
+  V(4, S)                                 \
+  V(1, D)                                 \
+  V(2, D)
+#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE)             \
+  VRegister VRegister::V##LANES##LANE_TYPE() const {                   \
+    VIXL_ASSERT(IsVRegister());                                        \
+    return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \
+  }
+VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION)
+#undef VIXL_CPUREG_NEON_COERCION_LIST
+#undef VIXL_DEFINE_CPUREG_NEON_COERCION
+
+// Semantic type coersion for sdot and udot.
+// TODO: Use the qualifiers_ field to distinguish this from ::S().
+VRegister VRegister::S4B() const {
+  VIXL_ASSERT(IsVRegister());
+  return SRegister(GetCode());
+}
+
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3,
+                const CPURegister& reg4,
+                const CPURegister& reg5,
+                const CPURegister& reg6,
+                const CPURegister& reg7,
+                const CPURegister& reg8) {
+  int number_of_valid_regs = 0;
+  int number_of_valid_vregs = 0;
+  int number_of_valid_pregs = 0;
+
+  RegList unique_regs = 0;
+  RegList unique_vregs = 0;
+  RegList unique_pregs = 0;
+
+  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
+
+  for (size_t i = 0; i < ArrayLength(regs); i++) {
+    switch (regs[i].GetBank()) {
+      case CPURegister::kRRegisterBank:
+        number_of_valid_regs++;
+        unique_regs |= regs[i].GetBit();
+        break;
+      case CPURegister::kVRegisterBank:
+        number_of_valid_vregs++;
+        unique_vregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kPRegisterBank:
+        number_of_valid_pregs++;
+        unique_pregs |= regs[i].GetBit();
+        break;
+      case CPURegister::kNoRegisterBank:
+        VIXL_ASSERT(regs[i].IsNone());
+        break;
+    }
+  }
+
+  int number_of_unique_regs = CountSetBits(unique_regs);
+  int number_of_unique_vregs = CountSetBits(unique_vregs);
+  int number_of_unique_pregs = CountSetBits(unique_pregs);
+
+  VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs);
+  VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs);
+  VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs);
+
+  return (number_of_valid_regs != number_of_unique_regs) ||
+         (number_of_valid_vregs != number_of_unique_vregs) ||
+         (number_of_valid_pregs != number_of_unique_pregs);
+}
+
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3,
+                        const CPURegister& reg4,
+                        const CPURegister& reg5,
+                        const CPURegister& reg6,
+                        const CPURegister& reg7,
+                        const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1);
+  match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1);
+  match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1);
+  match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1);
+  match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1);
+  return match;
+}
+
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3,
+             const CPURegister& reg4,
+             const CPURegister& reg5,
+             const CPURegister& reg6,
+             const CPURegister& reg7,
+             const CPURegister& reg8) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool even = (reg1.GetCode() % 2) == 0;
+  even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0);
+  even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0);
+  even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0);
+  even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0);
+  even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0);
+  even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0);
+  even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0);
+  return even;
+}
+
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3,
+                    const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+
+  if (!reg2.IsValid()) {
+    return true;
+  } else if (reg2.GetCode() !=
+             ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg3.IsValid()) {
+    return true;
+  } else if (reg3.GetCode() !=
+             ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  if (!reg4.IsValid()) {
+    return true;
+  } else if (reg4.GetCode() !=
+             ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3,
+                   const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &= !reg2.IsValid() || reg2.IsSameFormat(reg1);
+  match &= !reg3.IsValid() || reg3.IsSameFormat(reg1);
+  match &= !reg4.IsValid() || reg4.IsSameFormat(reg1);
+  return match;
+}
+
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3,
+                     const CPURegister& reg4) {
+  VIXL_ASSERT(reg1.IsValid());
+  bool match = true;
+  match &=
+      !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  match &=
+      !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits());
+  return match;
+}
+}
+}  // namespace vixl::aarch64
diff --git a/src/aarch64/registers-aarch64.h b/src/aarch64/registers-aarch64.h
new file mode 100644
index 00000000..911974a8
--- /dev/null
+++ b/src/aarch64/registers-aarch64.h
@@ -0,0 +1,900 @@
+// Copyright 2019, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_
+#define VIXL_AARCH64_REGISTERS_AARCH64_H_
+
+#include <string>
+
+#include "instructions-aarch64.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// An integer type capable of representing a homogeneous, non-overlapping set of
+// registers as a bitmask of their codes.
+typedef uint64_t RegList;
+static const int kRegListSizeInBits = sizeof(RegList) * 8;
+
+class Register;
+class WRegister;
+class XRegister;
+
+class VRegister;
+class BRegister;
+class HRegister;
+class SRegister;
+class DRegister;
+class QRegister;
+
+class ZRegister;
+
+class PRegister;
+class PRegisterWithLaneSize;
+class PRegisterM;
+class PRegisterZ;
+
+// A container for any single register supported by the processor. Selected
+// qualifications are also supported. Basic registers can be constructed
+// directly as CPURegister objects. Other variants should be constructed as one
+// of the derived classes.
+//
+// CPURegister aims to support any getter that would also be available to more
+// specialised register types. However, using the equivalent functions on the
+// specialised register types can avoid run-time checks, and should therefore be
+// preferred where run-time polymorphism isn't required.
+//
+// Type-specific modifers are typically implemented only on the derived classes.
+//
+// The encoding is such that CPURegister objects are cheap to pass by value.
+class CPURegister {
+ public:
+  enum RegisterBank : uint8_t {
+    kNoRegisterBank = 0,
+    kRRegisterBank,
+    kVRegisterBank,
+    kPRegisterBank
+  };
+  enum RegisterType {
+    kNoRegister,
+    kRegister,
+    kVRegister,
+    kZRegister,
+    kPRegister
+  };
+
+  static const unsigned kUnknownSize = 0;
+
+  VIXL_CONSTEXPR CPURegister()
+      : code_(0),
+        bank_(kNoRegisterBank),
+        size_(kEncodedUnknownSize),
+        qualifiers_(kNoQualifiers),
+        lane_size_(kEncodedUnknownSize) {}
+
+  CPURegister(int code, int size_in_bits, RegisterType type)
+      : code_(code),
+        bank_(GetBankFor(type)),
+        size_(EncodeSizeInBits(size_in_bits)),
+        qualifiers_(kNoQualifiers),
+        lane_size_(EncodeSizeInBits(size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Basic accessors.
+
+  // TODO: Make this return 'int'.
+  unsigned GetCode() const { return code_; }
+
+  RegisterBank GetBank() const { return bank_; }
+
+  // For scalar registers, the lane size matches the register size, and is
+  // always known.
+  bool HasSize() const { return size_ != kEncodedUnknownSize; }
+  bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; }
+
+  RegList GetBit() const {
+    if (IsNone()) return 0;
+    VIXL_ASSERT(code_ < kRegListSizeInBits);
+    return static_cast<RegList>(1) << code_;
+  }
+
+  // Return the architectural name for this register.
+  // TODO: This is temporary. Ultimately, we should move the
+  // Simulator::*RegNameForCode helpers out of the simulator, and provide an
+  // independent way to obtain the name of a register.
+  std::string GetArchitecturalName() const;
+
+  // Return the highest valid register code for this type, to allow generic
+  // loops to be written. This excludes kSPRegInternalCode, since it is not
+  // contiguous, and sp usually requires special handling anyway.
+  unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); }
+
+  // Registers without a known size report kUnknownSize.
+  int GetSizeInBits() const { return DecodeSizeInBits(size_); }
+  int GetSizeInBytes() const { return DecodeSizeInBytes(size_); }
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); }
+  unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); }
+  unsigned GetLaneSizeInBytesLog2() const {
+    VIXL_ASSERT(HasLaneSize());
+    return DecodeSizeInBytesLog2(lane_size_);
+  }
+
+  int GetLanes() const {
+    if (HasSize() && HasLaneSize()) {
+      // Take advantage of the size encoding to calculate this efficiently.
+      VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1));
+      VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1));
+      int log2_delta = static_cast<int>(size_) - static_cast<int>(lane_size_);
+      VIXL_ASSERT(log2_delta >= 0);
+      return 1 << log2_delta;
+    }
+    return kUnknownSize;
+  }
+
+  bool Is8Bits() const { return size_ == kEncodedBRegSize; }
+  bool Is16Bits() const { return size_ == kEncodedHRegSize; }
+  bool Is32Bits() const { return size_ == kEncodedSRegSize; }
+  bool Is64Bits() const { return size_ == kEncodedDRegSize; }
+  bool Is128Bits() const { return size_ == kEncodedQRegSize; }
+
+  bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; }
+  bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; }
+  bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; }
+  bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; }
+  bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; }
+
+  // If Is<Foo>Register(), then it is valid to convert the CPURegister to some
+  // <Foo>Register<Bar> type.
+  //
+  //  If...                              ... then it is safe to construct ...
+  //      r.IsRegister()                       -> Register(r)
+  //      r.IsVRegister()                      -> VRegister(r)
+  //      r.IsZRegister()                      -> ZRegister(r)
+  //      r.IsPRegister()                      -> PRegister(r)
+  //
+  //      r.IsPRegister() && HasLaneSize()     -> PRegisterWithLaneSize(r)
+  //      r.IsPRegister() && IsMerging()       -> PRegisterM(r)
+  //      r.IsPRegister() && IsZeroing()       -> PRegisterZ(r)
+  bool IsRegister() const { return GetType() == kRegister; }
+  bool IsVRegister() const { return GetType() == kVRegister; }
+  bool IsZRegister() const { return GetType() == kZRegister; }
+  bool IsPRegister() const { return GetType() == kPRegister; }
+
+  bool IsNone() const { return GetType() == kNoRegister; }
+
+  // `GetType() == kNoRegister` implies IsNone(), and vice-versa.
+  // `GetType() == k<Foo>Register` implies Is<Foo>Register(), and vice-versa.
+  RegisterType GetType() const {
+    switch (bank_) {
+      case kNoRegisterBank:
+        return kNoRegister;
+      case kRRegisterBank:
+        return kRegister;
+      case kVRegisterBank:
+        return HasSize() ? kVRegister : kZRegister;
+      case kPRegisterBank:
+        return kPRegister;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegister;
+  }
+
+  // IsFPRegister() is true for scalar FP types (and therefore implies
+  // IsVRegister()). There is no corresponding FPRegister type.
+  bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); }
+
+  // TODO: These are stricter forms of the helpers above. We should make the
+  // basic helpers strict, and remove these.
+  bool IsValidRegister() const;
+  bool IsValidVRegister() const;
+  bool IsValidFPRegister() const;
+  bool IsValidZRegister() const;
+  bool IsValidPRegister() const;
+
+  bool IsValid() const;
+  bool IsValidOrNone() const { return IsNone() || IsValid(); }
+
+  bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); }
+  bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); }
+
+  bool IsSameType(const CPURegister& other) const {
+    return GetType() == other.GetType();
+  }
+
+  bool IsSameBank(const CPURegister& other) const {
+    return GetBank() == other.GetBank();
+  }
+
+  // Two registers with unknown size are considered to have the same size if
+  // they also have the same type. For example, all Z registers have the same
+  // size, even though we don't know what that is.
+  bool IsSameSizeAndType(const CPURegister& other) const {
+    return IsSameType(other) && (size_ == other.size_);
+  }
+
+  bool IsSameFormat(const CPURegister& other) const {
+    return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_);
+  }
+
+  // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'.
+  bool Aliases(const CPURegister& other) const {
+    return IsSameBank(other) && (code_ == other.code_);
+  }
+
+  bool Is(const CPURegister& other) const {
+    if (IsRegister() || IsVRegister()) {
+      // For core (W, X) and FP/NEON registers, we only consider the code, size
+      // and type. This is legacy behaviour.
+      // TODO: We should probably check every field for all registers.
+      return Aliases(other) && (size_ == other.size_);
+    } else {
+      // For Z and P registers, we require all fields to match exactly.
+      VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister());
+      return (code_ == other.code_) && (bank_ == other.bank_) &&
+             (size_ == other.size_) && (qualifiers_ == other.qualifiers_) &&
+             (lane_size_ == other.lane_size_);
+    }
+  }
+
+  // Conversions to specific register types. The result is a register that
+  // aliases the original CPURegister. That is, the original register bank
+  // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all
+  // other properties are ignored.
+  //
+  // Typical usage:
+  //
+  //     if (reg.GetBank() == kVRegisterBank) {
+  //       DRegister d = reg.D();
+  //       ...
+  //     }
+  //
+  // These could all return types with compile-time guarantees (like XRegister),
+  // but this breaks backwards-compatibility quite severely, particularly with
+  // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type.
+
+  // Core registers, like "w0".
+  Register W() const;
+  Register X() const;
+  // FP/NEON registers, like "b0".
+  VRegister B() const;
+  VRegister H() const;
+  VRegister S() const;
+  VRegister D() const;
+  VRegister Q() const;
+  VRegister V() const;
+  // SVE registers, like "z0".
+  ZRegister Z() const;
+  PRegister P() const;
+
+  // Utilities for kRegister types.
+
+  bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); }
+  bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); }
+  bool IsW() const { return IsRegister() && Is32Bits(); }
+  bool IsX() const { return IsRegister() && Is64Bits(); }
+
+  // Utilities for FP/NEON kVRegister types.
+
+  // These helpers ensure that the size and type of the register are as
+  // described. They do not consider the number of lanes that make up a vector.
+  // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
+  // does not imply Is1D() or Is8B().
+  // Check the number of lanes, ie. the format of the vector, using methods such
+  // as Is8B(), Is1D(), etc.
+  bool IsB() const { return IsVRegister() && Is8Bits(); }
+  bool IsH() const { return IsVRegister() && Is16Bits(); }
+  bool IsS() const { return IsVRegister() && Is32Bits(); }
+  bool IsD() const { return IsVRegister() && Is64Bits(); }
+  bool IsQ() const { return IsVRegister() && Is128Bits(); }
+
+  // As above, but also check that the register has exactly one lane. For
+  // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does
+  // not.
+  bool Is1B() const { return IsB() && IsScalar(); }
+  bool Is1H() const { return IsH() && IsScalar(); }
+  bool Is1S() const { return IsS() && IsScalar(); }
+  bool Is1D() const { return IsD() && IsScalar(); }
+  bool Is1Q() const { return IsQ() && IsScalar(); }
+
+  // Check the specific NEON format.
+  bool Is8B() const { return IsD() && IsLaneSizeB(); }
+  bool Is16B() const { return IsQ() && IsLaneSizeB(); }
+  bool Is2H() const { return IsS() && IsLaneSizeH(); }
+  bool Is4H() const { return IsD() && IsLaneSizeH(); }
+  bool Is8H() const { return IsQ() && IsLaneSizeH(); }
+  bool Is2S() const { return IsD() && IsLaneSizeS(); }
+  bool Is4S() const { return IsQ() && IsLaneSizeS(); }
+  bool Is2D() const { return IsQ() && IsLaneSizeD(); }
+
+  // A semantic alias for sdot and udot (indexed and by element) instructions.
+  // The current CPURegister implementation cannot not tell this from Is1S(),
+  // but it might do later.
+  // TODO: Do this with the qualifiers_ field.
+  bool Is1S4B() const { return Is1S(); }
+
+  // Utilities for SVE registers.
+
+  bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; }
+  bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); }
+  bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); }
+
+  // SVE types have unknown sizes, but within known bounds.
+
+  int GetMaxSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMaxSizeInBytes;
+      case kPRegister:
+        return kPRegMaxSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMinSizeInBytes() const {
+    switch (GetType()) {
+      case kZRegister:
+        return kZRegMinSizeInBytes;
+      case kPRegister:
+        return kPRegMinSizeInBytes;
+      default:
+        VIXL_ASSERT(HasSize());
+        return GetSizeInBits();
+    }
+  }
+
+  int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; }
+  int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; }
+
+  static RegisterBank GetBankFor(RegisterType type) {
+    switch (type) {
+      case kNoRegister:
+        return kNoRegisterBank;
+      case kRegister:
+        return kRRegisterBank;
+      case kVRegister:
+      case kZRegister:
+        return kVRegisterBank;
+      case kPRegister:
+        return kPRegisterBank;
+    }
+    VIXL_UNREACHABLE();
+    return kNoRegisterBank;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterType type) {
+    return GetMaxCodeFor(GetBankFor(type));
+  }
+
+ protected:
+  enum EncodedSize : uint8_t {
+    // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero.
+    kEncodedUnknownSize = 0,
+
+    // The implementation assumes that the remaining sizes are encoded as
+    // `log2(size) + c`, so the following names must remain in sequence.
+    kEncodedBRegSize,
+    kEncodedHRegSize,
+    kEncodedSRegSize,
+    kEncodedDRegSize,
+    kEncodedQRegSize,
+
+    kEncodedWRegSize = kEncodedSRegSize,
+    kEncodedXRegSize = kEncodedDRegSize
+  };
+  VIXL_STATIC_ASSERT(kSRegSize == kWRegSize);
+  VIXL_STATIC_ASSERT(kDRegSize == kXRegSize);
+
+  char GetLaneSizeSymbol() const {
+    switch (lane_size_) {
+      case kEncodedBRegSize:
+        return 'B';
+      case kEncodedHRegSize:
+        return 'H';
+      case kEncodedSRegSize:
+        return 'S';
+      case kEncodedDRegSize:
+        return 'D';
+      case kEncodedQRegSize:
+        return 'Q';
+      case kEncodedUnknownSize:
+        break;
+    }
+    VIXL_UNREACHABLE();
+    return '?';
+  }
+
+  static EncodedSize EncodeSizeInBits(int size_in_bits) {
+    switch (size_in_bits) {
+      case kUnknownSize:
+        return kEncodedUnknownSize;
+      case kBRegSize:
+        return kEncodedBRegSize;
+      case kHRegSize:
+        return kEncodedHRegSize;
+      case kSRegSize:
+        return kEncodedSRegSize;
+      case kDRegSize:
+        return kEncodedDRegSize;
+      case kQRegSize:
+        return kEncodedQRegSize;
+    }
+    VIXL_UNREACHABLE();
+    return kEncodedUnknownSize;
+  }
+
+  static int DecodeSizeInBytesLog2(EncodedSize encoded_size) {
+    switch (encoded_size) {
+      case kEncodedUnknownSize:
+        // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here.
+        VIXL_UNREACHABLE();
+        return -1;
+      case kEncodedBRegSize:
+        return kBRegSizeInBytesLog2;
+      case kEncodedHRegSize:
+        return kHRegSizeInBytesLog2;
+      case kEncodedSRegSize:
+        return kSRegSizeInBytesLog2;
+      case kEncodedDRegSize:
+        return kDRegSizeInBytesLog2;
+      case kEncodedQRegSize:
+        return kQRegSizeInBytesLog2;
+    }
+    VIXL_UNREACHABLE();
+    return kUnknownSize;
+  }
+
+  static int DecodeSizeInBytes(EncodedSize encoded_size) {
+    if (encoded_size == kEncodedUnknownSize) {
+      return kUnknownSize;
+    }
+    return 1 << DecodeSizeInBytesLog2(encoded_size);
+  }
+
+  static int DecodeSizeInBits(EncodedSize encoded_size) {
+    VIXL_STATIC_ASSERT(kUnknownSize == 0);
+    return DecodeSizeInBytes(encoded_size) * kBitsPerByte;
+  }
+
+  static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank);
+
+  enum Qualifiers : uint8_t {
+    kNoQualifiers = 0,
+    // Used by P registers.
+    kMerging,
+    kZeroing
+  };
+
+  // An unchecked constructor, for use by derived classes.
+  CPURegister(int code,
+              EncodedSize size,
+              RegisterBank bank,
+              EncodedSize lane_size,
+              Qualifiers qualifiers = kNoQualifiers)
+      : code_(code),
+        bank_(bank),
+        size_(size),
+        qualifiers_(qualifiers),
+        lane_size_(lane_size) {}
+
+  // TODO: Check that access to these fields is reasonably efficient.
+  uint8_t code_;
+  RegisterBank bank_;
+  EncodedSize size_;
+  Qualifiers qualifiers_;
+  EncodedSize lane_size_;
+};
+// Ensure that CPURegisters can fit in a single (64-bit) register. This is a
+// proxy for being "cheap to pass by value", which is hard to check directly.
+VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t));
+
+// TODO: Add constexpr constructors.
+#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \
+  VIXL_CONSTEXPR NAME() : PARENT_TYPE() {}                             \
+                                                                       \
+  explicit NAME(CPURegister other) : PARENT_TYPE(other) {              \
+    VIXL_ASSERT(IsValid());                                            \
+  }                                                                    \
+                                                                       \
+  VIXL_CONSTEXPR static unsigned GetMaxCode() {                        \
+    return kNumberOf##REGISTER_TYPE##s - 1;                            \
+  }
+
+// Any W or X register, including the zero register and the stack pointer.
+class Register : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister)
+
+  Register(int code, int size_in_bits)
+      : CPURegister(code, size_in_bits, kRegister) {
+    VIXL_ASSERT(IsValidRegister());
+  }
+
+  bool IsValid() const { return IsValidRegister(); }
+};
+
+// Any FP or NEON V register, including vector (V.<T>) and scalar forms
+// (B, H, S, D, Q).
+class VRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister)
+
+  // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0).
+  explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1)
+      : CPURegister(code,
+                    EncodeSizeInBits(size_in_bits),
+                    kVRegisterBank,
+                    EncodeLaneSizeInBits(size_in_bits, lanes)) {
+    VIXL_ASSERT(IsValidVRegister());
+  }
+
+  VRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)),
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  VRegister V8B() const;
+  VRegister V16B() const;
+  VRegister V2H() const;
+  VRegister V4H() const;
+  VRegister V8H() const;
+  VRegister V2S() const;
+  VRegister V4S() const;
+  VRegister V1D() const;
+  VRegister V2D() const;
+  VRegister S4B() const;
+
+  bool IsValid() const { return IsValidVRegister(); }
+
+ protected:
+  static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) {
+    VIXL_ASSERT(lanes >= 1);
+    VIXL_ASSERT((size_in_bits % lanes) == 0);
+    return EncodeSizeInBits(size_in_bits / lanes);
+  }
+};
+
+// Any SVE Z register, with or without a lane size specifier.
+class ZRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister)
+
+  explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  ZRegister(int code, VectorFormat format)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kVRegisterBank,
+                    EncodeSizeInBits(LaneSizeInBitsFromFormat(format)),
+                    kNoQualifiers) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  // Return a Z register with a known lane size (like "z0.B").
+  ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); }
+  ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); }
+  ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); }
+  ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); }
+  ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); }
+
+  template <typename T>
+  ZRegister WithLaneSize(T format) const {
+    return ZRegister(GetCode(), format);
+  }
+
+  ZRegister WithSameLaneSizeAs(const CPURegister& other) const {
+    VIXL_ASSERT(other.HasLaneSize());
+    return this->WithLaneSize(other.GetLaneSizeInBits());
+  }
+
+  bool IsValid() const { return IsValidZRegister(); }
+};
+
+// Any SVE P register, with or without a qualifier or lane size specifier.
+class PRegister : public CPURegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister)
+
+  explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsUnqualified();
+  }
+
+  // Return a P register with a known lane size (like "p0.B").
+  PRegisterWithLaneSize VnB() const;
+  PRegisterWithLaneSize VnH() const;
+  PRegisterWithLaneSize VnS() const;
+  PRegisterWithLaneSize VnD() const;
+
+  template <typename T>
+  PRegisterWithLaneSize WithLaneSize(T format) const;
+
+  PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const;
+
+  // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or
+  // "/m" (merging) suffix. These methods are VIXL's equivalents.
+  PRegisterZ Zeroing() const;
+  PRegisterM Merging() const;
+
+ protected:
+  // Unchecked constructors, for use by derived classes.
+  PRegister(int code, EncodedSize encoded_lane_size)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    encoded_lane_size,
+                    kNoQualifiers) {}
+
+  PRegister(int code, Qualifiers qualifiers)
+      : CPURegister(code,
+                    kEncodedUnknownSize,
+                    kPRegisterBank,
+                    kEncodedUnknownSize,
+                    qualifiers) {}
+};
+
+// Any SVE P register with a known lane size (like "p0.B").
+class PRegisterWithLaneSize : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister)
+
+  PRegisterWithLaneSize(int code, int lane_size_in_bits)
+      : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  PRegisterWithLaneSize(int code, VectorFormat format)
+      : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && HasLaneSize() && IsUnqualified();
+  }
+
+  // Overload lane size accessors so we can assert `HasLaneSize()`. This allows
+  // tools such as clang-tidy to prove that the result of GetLaneSize* is
+  // non-zero.
+
+  // TODO: Make these return 'int'.
+  unsigned GetLaneSizeInBits() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBits();
+  }
+
+  unsigned GetLaneSizeInBytes() const {
+    VIXL_ASSERT(HasLaneSize());
+    return PRegister::GetLaneSizeInBytes();
+  }
+};
+
+// Any SVE P register with the zeroing qualifier (like "p0/z").
+class PRegisterZ : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister)
+
+  explicit PRegisterZ(int code) : PRegister(code, kZeroing) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsZeroing();
+  }
+};
+
+// Any SVE P register with the merging qualifier (like "p0/m").
+class PRegisterM : public PRegister {
+ public:
+  VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister)
+
+  explicit PRegisterM(int code) : PRegister(code, kMerging) {
+    VIXL_ASSERT(IsValid());
+  }
+
+  bool IsValid() const {
+    return IsValidPRegister() && !HasLaneSize() && IsMerging();
+  }
+};
+
+inline PRegisterWithLaneSize PRegister::VnB() const {
+  return PRegisterWithLaneSize(GetCode(), kBRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnH() const {
+  return PRegisterWithLaneSize(GetCode(), kHRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnS() const {
+  return PRegisterWithLaneSize(GetCode(), kSRegSize);
+}
+inline PRegisterWithLaneSize PRegister::VnD() const {
+  return PRegisterWithLaneSize(GetCode(), kDRegSize);
+}
+
+template <typename T>
+inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const {
+  return PRegisterWithLaneSize(GetCode(), format);
+}
+
+inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs(
+    const CPURegister& other) const {
+  VIXL_ASSERT(other.HasLaneSize());
+  return this->WithLaneSize(other.GetLaneSizeInBits());
+}
+
+inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); }
+inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); }
+
+#define VIXL_REGISTER_WITH_SIZE_LIST(V) \
+  V(WRegister, kWRegSize, Register)     \
+  V(XRegister, kXRegSize, Register)     \
+  V(QRegister, kQRegSize, VRegister)    \
+  V(DRegister, kDRegSize, VRegister)    \
+  V(SRegister, kSRegSize, VRegister)    \
+  V(HRegister, kHRegSize, VRegister)    \
+  V(BRegister, kBRegSize, VRegister)
+
+#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT)           \
+  class NAME : public PARENT {                                       \
+   public:                                                           \
+    VIXL_CONSTEXPR NAME() : PARENT() {}                              \
+    explicit NAME(int code) : PARENT(code, SIZE) {}                  \
+                                                                     \
+    explicit NAME(PARENT other) : PARENT(other) {                    \
+      VIXL_ASSERT(GetSizeInBits() == SIZE);                          \
+    }                                                                \
+                                                                     \
+    PARENT As##PARENT() const { return *this; }                      \
+                                                                     \
+    VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; }        \
+                                                                     \
+    bool IsValid() const {                                           \
+      return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \
+    }                                                                \
+  };
+
+VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE)
+
+// No*Reg is used to provide default values for unused arguments, error cases
+// and so on. Note that these (and the default constructors) all compare equal
+// (using the Is() method).
+const Register NoReg;
+const VRegister NoVReg;
+const CPURegister NoCPUReg;
+const ZRegister NoZReg;
+
+// TODO: Ideally, these would use specialised register types (like XRegister and
+// so on). However, doing so throws up template overloading problems elsewhere.
+#define VIXL_DEFINE_REGISTERS(N)       \
+  const Register w##N = WRegister(N);  \
+  const Register x##N = XRegister(N);  \
+  const VRegister b##N = BRegister(N); \
+  const VRegister h##N = HRegister(N); \
+  const VRegister s##N = SRegister(N); \
+  const VRegister d##N = DRegister(N); \
+  const VRegister q##N = QRegister(N); \
+  const VRegister v##N(N);             \
+  const ZRegister z##N(N);
+AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS)
+#undef VIXL_DEFINE_REGISTERS
+
+#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N);
+AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS)
+#undef VIXL_DEFINE_P_REGISTERS
+
+// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'.
+const Register wsp = WRegister(kSPRegInternalCode);
+const Register sp = XRegister(kSPRegInternalCode);
+
+// Standard aliases.
+const Register ip0 = x16;
+const Register ip1 = x17;
+const Register lr = x30;
+const Register xzr = x31;
+const Register wzr = w31;
+
+// AreAliased returns true if any of the named registers overlap. Arguments
+// set to NoReg are ignored. The system stack pointer may be specified.
+bool AreAliased(const CPURegister& reg1,
+                const CPURegister& reg2,
+                const CPURegister& reg3 = NoReg,
+                const CPURegister& reg4 = NoReg,
+                const CPURegister& reg5 = NoReg,
+                const CPURegister& reg6 = NoReg,
+                const CPURegister& reg7 = NoReg,
+                const CPURegister& reg8 = NoReg);
+
+// AreSameSizeAndType returns true if all of the specified registers have the
+// same size, and are of the same type. The system stack pointer may be
+// specified. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreSameSizeAndType(const CPURegister& reg1,
+                        const CPURegister& reg2,
+                        const CPURegister& reg3 = NoCPUReg,
+                        const CPURegister& reg4 = NoCPUReg,
+                        const CPURegister& reg5 = NoCPUReg,
+                        const CPURegister& reg6 = NoCPUReg,
+                        const CPURegister& reg7 = NoCPUReg,
+                        const CPURegister& reg8 = NoCPUReg);
+
+// AreEven returns true if all of the specified registers have even register
+// indices. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoCPUReg).
+bool AreEven(const CPURegister& reg1,
+             const CPURegister& reg2,
+             const CPURegister& reg3 = NoReg,
+             const CPURegister& reg4 = NoReg,
+             const CPURegister& reg5 = NoReg,
+             const CPURegister& reg6 = NoReg,
+             const CPURegister& reg7 = NoReg,
+             const CPURegister& reg8 = NoReg);
+
+// AreConsecutive returns true if all of the specified registers are
+// consecutive in the register file. Arguments set to NoReg are ignored, as are
+// any subsequent arguments. At least one argument (reg1) must be valid
+// (not NoCPUReg).
+bool AreConsecutive(const CPURegister& reg1,
+                    const CPURegister& reg2,
+                    const CPURegister& reg3 = NoCPUReg,
+                    const CPURegister& reg4 = NoCPUReg);
+
+// AreSameFormat returns true if all of the specified registers have the same
+// vector format. Arguments set to NoReg are ignored, as are any subsequent
+// arguments. At least one argument (reg1) must be valid (not NoVReg).
+bool AreSameFormat(const CPURegister& reg1,
+                   const CPURegister& reg2,
+                   const CPURegister& reg3 = NoCPUReg,
+                   const CPURegister& reg4 = NoCPUReg);
+
+// AreSameLaneSize returns true if all of the specified registers have the same
+// element lane size, B, H, S or D. It doesn't compare the type of registers.
+// Arguments set to NoReg are ignored, as are any subsequent arguments.
+// At least one argument (reg1) must be valid (not NoVReg).
+// TODO: Remove this, and replace its uses with AreSameFormat.
+bool AreSameLaneSize(const CPURegister& reg1,
+                     const CPURegister& reg2,
+                     const CPURegister& reg3 = NoCPUReg,
+                     const CPURegister& reg4 = NoCPUReg);
+}
+}  // namespace vixl::aarch64
+
+#endif  // VIXL_AARCH64_REGISTERS_AARCH64_H_
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 855a2971..6d6d1677 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -26,6 +26,9 @@
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
+#include <errno.h>
+#include <unistd.h>
+
 #include <cmath>
 #include <cstring>
 #include <limits>
@@ -65,12 +68,13 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
 
 
 Simulator::Simulator(Decoder* decoder, FILE* stream)
-    : cpu_features_auditor_(decoder, CPUFeatures::All()) {
+    : movprfx_(NULL), cpu_features_auditor_(decoder, CPUFeatures::All()) {
   // Ensure that shift operations act as the simulator expects.
   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
 
-  instruction_stats_ = false;
+  // Set up a dummy pipe for CanReadMemory.
+  VIXL_CHECK(pipe(dummy_pipe_fd_) == 0);
 
   // Set up the decoder.
   decoder_ = decoder;
@@ -91,6 +95,10 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
   SetColouredTrace(false);
   trace_parameters_ = LOG_NONE;
 
+  // We have to configure the SVE vector register length before calling
+  // ResetState().
+  SetVectorLengthInBits(kZRegMinSize);
+
   ResetState();
 
   // Allocate and set up the simulator stack.
@@ -105,8 +113,6 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
   tos = AlignDown(tos, 16);
   WriteSp(tos);
 
-  instrumentation_ = NULL;
-
   // Print a warning about exclusive-access instructions, but only the first
   // time they are encountered. This warning can be silenced using
   // SilenceExclusiveAccessWarning().
@@ -116,52 +122,111 @@ Simulator::Simulator(Decoder* decoder, FILE* stream)
 
   // Initialize the common state of RNDR and RNDRRS.
   uint16_t seed[3] = {11, 22, 33};
-  VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rndr_state_));
-  memcpy(rndr_state_, seed, sizeof(rndr_state_));
-}
+  VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
+  memcpy(rand_state_, seed, sizeof(rand_state_));
 
+  // Initialize all bits of pseudo predicate register to true.
+  LogicPRegister ones(pregister_all_true_);
+  ones.SetAllBits();
+}
 
-void Simulator::ResetState() {
+void Simulator::ResetSystemRegisters() {
   // Reset the system registers.
   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
+  ResetFFR();
+}
 
-  // Reset registers to 0.
-  pc_ = NULL;
-  pc_modified_ = false;
+void Simulator::ResetRegisters() {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     WriteXRegister(i, 0xbadbeef);
   }
-  // Set FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
-  uint64_t nan_bits[] = {
-      UINT64_C(0x7ff00cab7f8ba9e1), UINT64_C(0x7ff0dead7f8beef1),
-  };
-  VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits[0] & kDRegMask)));
-  VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits[0] & kSRegMask)));
+  // Returning to address 0 exits the Simulator.
+  WriteLr(kEndOfSimAddress);
+}
 
-  qreg_t q_bits;
-  VIXL_ASSERT(sizeof(q_bits) == sizeof(nan_bits));
-  memcpy(&q_bits, nan_bits, sizeof(nan_bits));
+void Simulator::ResetVRegisters() {
+  // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
+  VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
+  int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
+  for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+    VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
+    vregisters_[i].NotifyAccessAsZ();
+    for (int lane = 0; lane < lane_count; lane++) {
+      // Encode the register number and (D-sized) lane into each NaN, to
+      // make them easier to trace.
+      uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
+                          (0x0000000000000001 * lane);
+      VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
+      VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
+      vregisters_[i].Insert(lane, nan_bits);
+    }
+  }
+}
 
-  for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
-    WriteQRegister(i, q_bits);
+void Simulator::ResetPRegisters() {
+  VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
+  int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
+  // Ensure the register configuration fits in this bit encoding.
+  VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
+  VIXL_ASSERT(lane_count <= UINT8_MAX);
+  for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+    VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
+    for (int lane = 0; lane < lane_count; lane++) {
+      // Encode the register number and (H-sized) lane into each lane slot.
+      uint16_t bits = (0x0100 * lane) | i;
+      pregisters_[i].Insert(lane, bits);
+    }
   }
-  // Returning to address 0 exits the Simulator.
-  WriteLr(kEndOfSimAddress);
+}
+
+void Simulator::ResetFFR() {
+  VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
+  int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
+  ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
+}
+
+void Simulator::ResetState() {
+  ResetSystemRegisters();
+  ResetRegisters();
+  ResetVRegisters();
+  ResetPRegisters();
 
+  pc_ = NULL;
+  pc_modified_ = false;
+
+  // BTI state.
   btype_ = DefaultBType;
   next_btype_ = DefaultBType;
 }
 
+void Simulator::SetVectorLengthInBits(unsigned vector_length) {
+  VIXL_ASSERT((vector_length >= kZRegMinSize) &&
+              (vector_length <= kZRegMaxSize));
+  VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
+  vector_length_ = vector_length;
+
+  for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+    vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
+  }
+  for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+    pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
+  }
+
+  ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
+
+  ResetVRegisters();
+  ResetPRegisters();
+  ResetFFR();
+}
 
 Simulator::~Simulator() {
   delete[] stack_;
   // The decoder may outlive the simulator.
   decoder_->RemoveVisitor(print_disasm_);
   delete print_disasm_;
-
-  decoder_->RemoveVisitor(instrumentation_);
-  delete instrumentation_;
+  close(dummy_pipe_fd_[0]);
+  close(dummy_pipe_fd_[1]);
 }
 
 
@@ -182,6 +247,7 @@ void Simulator::RunFrom(const Instruction* first) {
 }
 
 
+// clang-format off
 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
                                        "x12", "x13", "x14", "x15", "x16", "x17",
@@ -196,6 +262,13 @@ const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
                                        "w24", "w25", "w26", "w27", "w28", "w29",
                                        "w30", "wzr", "wsp"};
 
+const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
+                                       "b6",  "b7",  "b8",  "b9",  "b10", "b11",
+                                       "b12", "b13", "b14", "b15", "b16", "b17",
+                                       "b18", "b19", "b20", "b21", "b22", "b23",
+                                       "b24", "b25", "b26", "b27", "b28", "b29",
+                                       "b30", "b31"};
+
 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
                                        "h12", "h13", "h14", "h15", "h16", "h17",
@@ -224,27 +297,47 @@ const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
                                        "v24", "v25", "v26", "v27", "v28", "v29",
                                        "v30", "v31"};
 
+const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
+                                       "z6",  "z7",  "z8",  "z9",  "z10", "z11",
+                                       "z12", "z13", "z14", "z15", "z16", "z17",
+                                       "z18", "z19", "z20", "z21", "z22", "z23",
+                                       "z24", "z25", "z26", "z27", "z28", "z29",
+                                       "z30", "z31"};
+
+const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
+                                       "p6",  "p7",  "p8",  "p9",  "p10", "p11",
+                                       "p12", "p13", "p14", "p15"};
+// clang-format on
+
 
 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
-  VIXL_ASSERT(code < kNumberOfRegisters);
   // If the code represents the stack pointer, index the name after zr.
-  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+  if ((code == kSPRegInternalCode) ||
+      ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
     code = kZeroRegCode + 1;
   }
+  VIXL_ASSERT(code < ArrayLength(wreg_names));
   return wreg_names[code];
 }
 
 
 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
-  VIXL_ASSERT(code < kNumberOfRegisters);
   // If the code represents the stack pointer, index the name after zr.
-  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
+  if ((code == kSPRegInternalCode) ||
+      ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
     code = kZeroRegCode + 1;
   }
+  VIXL_ASSERT(code < ArrayLength(xreg_names));
   return xreg_names[code];
 }
 
 
+const char* Simulator::BRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfVRegisters);
+  return breg_names[code];
+}
+
+
 const char* Simulator::HRegNameForCode(unsigned code) {
   VIXL_ASSERT(code < kNumberOfVRegisters);
   return hreg_names[code];
@@ -269,6 +362,39 @@ const char* Simulator::VRegNameForCode(unsigned code) {
 }
 
 
+const char* Simulator::ZRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfZRegisters);
+  return zreg_names[code];
+}
+
+
+const char* Simulator::PRegNameForCode(unsigned code) {
+  VIXL_ASSERT(code < kNumberOfPRegisters);
+  return preg_names[code];
+}
+
+SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
+  SimVRegister ones, result;
+  dup_immediate(kFormatVnB, ones, 0xff);
+  mov_zeroing(kFormatVnB, result, pg, ones);
+  return result;
+}
+
+void Simulator::ExtractFromSimVRegister(VectorFormat vform,
+                                        SimPRegister& pd,
+                                        SimVRegister vreg) {
+  SimVRegister zero;
+  dup_immediate(kFormatVnB, zero, 0);
+  SVEIntCompareVectorsHelper(ne,
+                             vform,
+                             pd,
+                             GetPTrue(),
+                             vreg,
+                             zero,
+                             false,
+                             LeaveFlags);
+}
+
 #define COLOUR(colour_code) "\033[0;" colour_code "m"
 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
 #define COLOUR_HIGHLIGHT "\033[43m"
@@ -291,6 +417,8 @@ void Simulator::SetColouredTrace(bool value) {
   clr_reg_value = value ? COLOUR(CYAN) : "";
   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
+  clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
+  clr_preg_value = value ? COLOUR(GREEN) : "";
   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
   clr_warning_message = value ? COLOUR(YELLOW) : "";
@@ -322,22 +450,6 @@ void Simulator::SetTraceParameters(int parameters) {
 }
 
 
-void Simulator::SetInstructionStats(bool value) {
-  if (value != instruction_stats_) {
-    if (value) {
-      if (instrumentation_ == NULL) {
-        // Set the sample period to 10, as the VIXL examples and tests are
-        // short.
-        instrumentation_ = new Instrument("vixl_stats.csv", 10);
-      }
-      decoder_->AppendVisitor(instrumentation_);
-    } else if (instrumentation_ != NULL) {
-      decoder_->RemoveVisitor(instrumentation_);
-    }
-    instruction_stats_ = value;
-  }
-}
-
 // Helpers ---------------------------------------------------------------------
 uint64_t Simulator::AddWithCarry(unsigned reg_size,
                                  bool set_flags,
@@ -379,44 +491,50 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size,
 
 
 int64_t Simulator::ShiftOperand(unsigned reg_size,
-                                int64_t value,
+                                uint64_t uvalue,
                                 Shift shift_type,
                                 unsigned amount) const {
-  VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
-  if (amount == 0) {
-    return value;
-  }
-  uint64_t uvalue = static_cast<uint64_t>(value);
-  uint64_t mask = kWRegMask;
-  bool is_negative = (uvalue & kWSignMask) != 0;
-  if (reg_size == kXRegSize) {
-    mask = kXRegMask;
-    is_negative = (uvalue & kXSignMask) != 0;
-  }
-
-  switch (shift_type) {
-    case LSL:
-      uvalue <<= amount;
-      break;
-    case LSR:
-      uvalue >>= amount;
-      break;
-    case ASR:
-      uvalue >>= amount;
-      if (is_negative) {
-        // Simulate sign-extension to 64 bits.
-        uvalue |= ~UINT64_C(0) << (reg_size - amount);
+  VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
+              (reg_size == kSRegSize) || (reg_size == kDRegSize));
+  if (amount > 0) {
+    uint64_t mask = GetUintMask(reg_size);
+    bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
+    // The behavior is undefined in c++ if the shift amount greater than or
+    // equal to the register lane size. Work out the shifted result based on
+    // architectural behavior before performing the c++ type shfit operations.
+    switch (shift_type) {
+      case LSL:
+        if (amount >= reg_size) {
+          return UINT64_C(0);
+        }
+        uvalue <<= amount;
+        break;
+      case LSR:
+        if (amount >= reg_size) {
+          return UINT64_C(0);
+        }
+        uvalue >>= amount;
+        break;
+      case ASR:
+        if (amount >= reg_size) {
+          return is_negative ? ~UINT64_C(0) : UINT64_C(0);
+        }
+        uvalue >>= amount;
+        if (is_negative) {
+          // Simulate sign-extension to 64 bits.
+          uvalue |= ~UINT64_C(0) << (reg_size - amount);
+        }
+        break;
+      case ROR: {
+        uvalue = RotateRight(uvalue, amount, reg_size);
+        break;
       }
-      break;
-    case ROR: {
-      uvalue = RotateRight(uvalue, amount, reg_size);
-      break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        return 0;
     }
-    default:
-      VIXL_UNIMPLEMENTED();
-      return 0;
+    uvalue &= mask;
   }
-  uvalue &= mask;
 
   int64_t result;
   memcpy(&result, &uvalue, sizeof(result));
@@ -592,6 +710,15 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
       return kPrintReg1S;
     case kFormatD:
       return kPrintReg1D;
+
+    case kFormatVnB:
+      return kPrintRegVnB;
+    case kFormatVnH:
+      return kPrintRegVnH;
+    case kFormatVnS:
+      return kPrintRegVnS;
+    case kFormatVnD:
+      return kPrintRegVnD;
   }
 }
 
@@ -623,301 +750,445 @@ Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
   }
 }
 
-
-void Simulator::PrintWrittenRegisters() {
+void Simulator::PrintRegisters() {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
-    if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
+    if (i == kSpRegCode) i = kSPRegInternalCode;
+    PrintRegister(i);
   }
 }
 
-
-void Simulator::PrintWrittenVRegisters() {
+void Simulator::PrintVRegisters() {
   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
-    // At this point there is no type information, so print as a raw 1Q.
-    if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
+    PrintVRegister(i);
   }
 }
 
-
-void Simulator::PrintSystemRegisters() {
-  PrintSystemRegister(NZCV);
-  PrintSystemRegister(FPCR);
+void Simulator::PrintZRegisters() {
+  for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+    PrintZRegister(i);
+  }
 }
 
-
-void Simulator::PrintRegisters() {
+void Simulator::PrintWrittenRegisters() {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
-    PrintRegister(i);
+    if (registers_[i].WrittenSinceLastLog()) {
+      if (i == kSpRegCode) i = kSPRegInternalCode;
+      PrintRegister(i);
+    }
   }
 }
 
-
-void Simulator::PrintVRegisters() {
+void Simulator::PrintWrittenVRegisters() {
+  bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
-    // At this point there is no type information, so print as a raw 1Q.
-    PrintVRegister(i, kPrintReg1Q);
+    if (vregisters_[i].WrittenSinceLastLog()) {
+      // Z registers are initialised in the constructor before the user can
+      // configure the CPU features, so we must also check for SVE here.
+      if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
+        PrintZRegister(i);
+      } else {
+        PrintVRegister(i);
+      }
+    }
   }
 }
 
-
-// Print a register's name and raw value.
-//
-// Only the least-significant `size_in_bytes` bytes of the register are printed,
-// but the value is aligned as if the whole register had been printed.
-//
-// For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
-// -- the default -- so that the whole register is printed. Other values of
-// size_in_bytes are intended for use when the register hasn't actually been
-// updated (such as in PrintWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a memory access annotation).
-void Simulator::PrintRegisterRawHelper(unsigned code,
-                                       Reg31Mode r31mode,
-                                       int size_in_bytes) {
-  // The template for all supported sizes.
-  //   "# x{code}: 0xffeeddccbbaa9988"
-  //   "# w{code}:         0xbbaa9988"
-  //   "# w{code}<15:0>:       0x9988"
-  //   "# w{code}<7:0>:          0x88"
-  unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
-
-  const char* name = "";
-  const char* suffix = "";
-  switch (size_in_bytes) {
-    case kXRegSizeInBytes:
-      name = XRegNameForCode(code, r31mode);
-      break;
-    case kWRegSizeInBytes:
-      name = WRegNameForCode(code, r31mode);
-      break;
-    case 2:
-      name = WRegNameForCode(code, r31mode);
-      suffix = "<15:0>";
-      padding_chars -= strlen(suffix);
-      break;
-    case 1:
-      name = WRegNameForCode(code, r31mode);
-      suffix = "<7:0>";
-      padding_chars -= strlen(suffix);
-      break;
-    default:
-      VIXL_UNREACHABLE();
-  }
-  fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
-
-  // Print leading padding spaces.
-  VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
-  for (unsigned i = 0; i < padding_chars; i++) {
-    putc(' ', stream_);
+void Simulator::PrintWrittenPRegisters() {
+  // P registers are initialised in the constructor before the user can
+  // configure the CPU features, so we must check for SVE here.
+  if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
+  for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+    if (pregisters_[i].WrittenSinceLastLog()) {
+      PrintPRegister(i);
+    }
   }
-
-  // Print the specified bits in hexadecimal format.
-  uint64_t bits = ReadRegister<uint64_t>(code, r31mode);
-  bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
-  VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
-
-  int chars = size_in_bytes * 2;
-  fprintf(stream_,
-          "%s0x%0*" PRIx64 "%s",
-          clr_reg_value,
-          chars,
-          bits,
-          clr_normal);
+  if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
 }
 
-
-void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
-  registers_[code].NotifyRegisterLogged();
-
-  // Don't print writes into xzr.
-  if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
-    return;
-  }
-
-  // The template for all x and w registers:
-  //   "# x{code}: 0x{value}"
-  //   "# w{code}: 0x{value}"
-
-  PrintRegisterRawHelper(code, r31mode);
-  fprintf(stream_, "\n");
+void Simulator::PrintSystemRegisters() {
+  PrintSystemRegister(NZCV);
+  PrintSystemRegister(FPCR);
 }
 
-
-// Print a register's name and raw value.
-//
-// The `bytes` and `lsb` arguments can be used to limit the bytes that are
-// printed. These arguments are intended for use in cases where register hasn't
-// actually been updated (such as in PrintVWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a floating-point interpretation or a memory access annotation).
-void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
-  // The template for vector types:
-  //   "# v{code}: 0xffeeddccbbaa99887766554433221100".
-  // An example with bytes=4 and lsb=8:
-  //   "# v{code}:         0xbbaa9988                ".
-  fprintf(stream_,
-          "# %s%5s: %s",
-          clr_vreg_name,
-          VRegNameForCode(code),
-          clr_vreg_value);
-
-  int msb = lsb + bytes - 1;
-  int byte = kQRegSizeInBytes - 1;
-
-  // Print leading padding spaces. (Two spaces per byte.)
-  while (byte > msb) {
+void Simulator::PrintRegisterValue(const uint8_t* value,
+                                   int value_size,
+                                   PrintRegisterFormat format) {
+  int print_width = GetPrintRegSizeInBytes(format);
+  VIXL_ASSERT(print_width <= value_size);
+  for (int i = value_size - 1; i >= print_width; i--) {
+    // Pad with spaces so that values align vertically.
     fprintf(stream_, "  ");
-    byte--;
+    // If we aren't explicitly printing a partial value, ensure that the
+    // unprinted bits are zero.
+    VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
   }
-
-  // Print the specified part of the value, byte by byte.
-  qreg_t rawbits = ReadQRegister(code);
   fprintf(stream_, "0x");
-  while (byte >= lsb) {
-    fprintf(stream_, "%02x", rawbits.val[byte]);
-    byte--;
+  for (int i = print_width - 1; i >= 0; i--) {
+    fprintf(stream_, "%02x", value[i]);
   }
+}
 
-  // Print trailing padding spaces.
-  while (byte >= 0) {
-    fprintf(stream_, "  ");
-    byte--;
+void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
+                                                uint16_t lane_mask,
+                                                PrintRegisterFormat format) {
+  VIXL_ASSERT((format & kPrintRegAsFP) != 0);
+  int lane_size = GetPrintRegLaneSizeInBytes(format);
+  fprintf(stream_, " (");
+  bool last_inactive = false;
+  const char* sep = "";
+  for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
+    bool access = (lane_mask & (1 << (i * lane_size))) != 0;
+    if (access) {
+      // Read the lane as a double, so we can format all FP types in the same
+      // way. We squash NaNs, and a double can exactly represent any other value
+      // that the smaller types can represent, so this is lossless.
+      double element;
+      switch (lane_size) {
+        case kHRegSizeInBytes: {
+          Float16 element_fp16;
+          VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
+          memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
+          element = FPToDouble(element_fp16, kUseDefaultNaN);
+          break;
+        }
+        case kSRegSizeInBytes: {
+          float element_fp32;
+          memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
+          element = static_cast<double>(element_fp32);
+          break;
+        }
+        case kDRegSizeInBytes: {
+          memcpy(&element, &value[i * lane_size], sizeof(element));
+          break;
+        }
+        default:
+          VIXL_UNREACHABLE();
+          fprintf(stream_, "{UnknownFPValue}");
+          continue;
+      }
+      if (IsNaN(element)) {
+        // The fprintf behaviour for NaNs is implementation-defined. Always
+        // print "nan", so that traces are consistent.
+        fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
+      } else {
+        fprintf(stream_,
+                "%s%s%#.4g%s",
+                sep,
+                clr_vreg_value,
+                element,
+                clr_normal);
+      }
+      last_inactive = false;
+    } else if (!last_inactive) {
+      // Replace each contiguous sequence of inactive lanes with "...".
+      fprintf(stream_, "%s...", sep);
+      last_inactive = true;
+    }
   }
-  fprintf(stream_, "%s", clr_normal);
+  fprintf(stream_, ")");
 }
 
+void Simulator::PrintRegister(int code,
+                              PrintRegisterFormat format,
+                              const char* suffix) {
+  VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
+              (static_cast<unsigned>(code) == kSPRegInternalCode));
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
+  VIXL_ASSERT((format & kPrintRegAsFP) == 0);
 
-// Print each of the specified lanes of a register as a float or double value.
-//
-// The `lane_count` and `lslane` arguments can be used to limit the lanes that
-// are printed. These arguments are intended for use in cases where register
-// hasn't actually been updated (such as in PrintVWrite).
-//
-// No newline is printed. This allows the caller to print more details (such as
-// a memory access annotation).
-void Simulator::PrintVRegisterFPHelper(unsigned code,
-                                       unsigned lane_size_in_bytes,
-                                       int lane_count,
-                                       int rightmost_lane) {
-  VIXL_ASSERT((lane_size_in_bytes == kHRegSizeInBytes) ||
-              (lane_size_in_bytes == kSRegSizeInBytes) ||
-              (lane_size_in_bytes == kDRegSizeInBytes));
-
-  unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
-  VIXL_ASSERT(msb <= kQRegSizeInBytes);
-
-  // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
-  // name is used:
-  //   " (h{code}: {value})"
-  //   " (s{code}: {value})"
-  //   " (d{code}: {value})"
-  // For vector types, "..." is used to represent one or more omitted lanes.
-  //   " (..., {value}, {value}, ...)"
-  if (lane_size_in_bytes == kHRegSizeInBytes) {
-    // TODO: Trace tests will fail until we regenerate them.
-    return;
-  }
-  if ((lane_count == 1) && (rightmost_lane == 0)) {
-    const char* name;
-    switch (lane_size_in_bytes) {
-      case kHRegSizeInBytes:
-        name = HRegNameForCode(code);
+  SimRegister* reg;
+  SimRegister zero;
+  if (code == kZeroRegCode) {
+    reg = &zero;
+  } else {
+    // registers_[31] holds the SP.
+    VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
+    reg = &registers_[code % kNumberOfRegisters];
+  }
+
+  // We trace register writes as whole register values, implying that any
+  // unprinted bits are all zero:
+  //   "#       x{code}: 0x{-----value----}"
+  //   "#       w{code}:         0x{-value}"
+  // Stores trace partial register values, implying nothing about the unprinted
+  // bits:
+  //   "# x{code}<63:0>: 0x{-----value----}"
+  //   "# x{code}<31:0>:         0x{-value}"
+  //   "# x{code}<15:0>:             0x{--}"
+  //   "#  x{code}<7:0>:               0x{}"
+
+  bool is_partial = (format & kPrintRegPartial) != 0;
+  unsigned print_reg_size = GetPrintRegSizeInBits(format);
+  std::stringstream name;
+  if (is_partial) {
+    name << XRegNameForCode(code) << GetPartialRegSuffix(format);
+  } else {
+    // Notify the register that it has been logged, but only if we're printing
+    // all of it.
+    reg->NotifyRegisterLogged();
+    switch (print_reg_size) {
+      case kWRegSize:
+        name << WRegNameForCode(code);
         break;
-      case kSRegSizeInBytes:
-        name = SRegNameForCode(code);
-        break;
-      case kDRegSizeInBytes:
-        name = DRegNameForCode(code);
+      case kXRegSize:
+        name << XRegNameForCode(code);
         break;
       default:
-        name = NULL;
         VIXL_UNREACHABLE();
-    }
-    fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
-  } else {
-    if (msb < (kQRegSizeInBytes - 1)) {
-      fprintf(stream_, " (..., ");
-    } else {
-      fprintf(stream_, " (");
+        return;
     }
   }
 
-  // Print the list of values.
-  const char* separator = "";
-  int leftmost_lane = rightmost_lane + lane_count - 1;
-  for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
-    double value;
-    switch (lane_size_in_bytes) {
-      case kHRegSizeInBytes:
-        value = ReadVRegister(code).GetLane<uint16_t>(lane);
+  fprintf(stream_,
+          "# %s%*s: %s",
+          clr_reg_name,
+          kPrintRegisterNameFieldWidth,
+          name.str().c_str(),
+          clr_reg_value);
+  PrintRegisterValue(*reg, format);
+  fprintf(stream_, "%s%s", clr_normal, suffix);
+}
+
+void Simulator::PrintVRegister(int code,
+                               PrintRegisterFormat format,
+                               const char* suffix) {
+  VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
+  VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
+              ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
+              ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
+
+  // We trace register writes as whole register values, implying that any
+  // unprinted bits are all zero:
+  //   "#        v{code}: 0x{-------------value------------}"
+  //   "#        d{code}:                 0x{-----value----}"
+  //   "#        s{code}:                         0x{-value}"
+  //   "#        h{code}:                             0x{--}"
+  //   "#        b{code}:                               0x{}"
+  // Stores trace partial register values, implying nothing about the unprinted
+  // bits:
+  //   "# v{code}<127:0>: 0x{-------------value------------}"
+  //   "#  v{code}<63:0>:                 0x{-----value----}"
+  //   "#  v{code}<31:0>:                         0x{-value}"
+  //   "#  v{code}<15:0>:                             0x{--}"
+  //   "#   v{code}<7:0>:                               0x{}"
+
+  bool is_partial = ((format & kPrintRegPartial) != 0);
+  std::stringstream name;
+  unsigned print_reg_size = GetPrintRegSizeInBits(format);
+  if (is_partial) {
+    name << VRegNameForCode(code) << GetPartialRegSuffix(format);
+  } else {
+    // Notify the register that it has been logged, but only if we're printing
+    // all of it.
+    vregisters_[code].NotifyRegisterLogged();
+    switch (print_reg_size) {
+      case kBRegSize:
+        name << BRegNameForCode(code);
         break;
-      case kSRegSizeInBytes:
-        value = ReadVRegister(code).GetLane<float>(lane);
+      case kHRegSize:
+        name << HRegNameForCode(code);
         break;
-      case kDRegSizeInBytes:
-        value = ReadVRegister(code).GetLane<double>(lane);
+      case kSRegSize:
+        name << SRegNameForCode(code);
+        break;
+      case kDRegSize:
+        name << DRegNameForCode(code);
+        break;
+      case kQRegSize:
+        name << VRegNameForCode(code);
         break;
       default:
-        value = 0.0;
         VIXL_UNREACHABLE();
+        return;
     }
-    if (IsNaN(value)) {
-      // The output for NaNs is implementation defined. Always print `nan`, so
-      // that traces are coherent across different implementations.
-      fprintf(stream_, "%s%snan%s", separator, clr_vreg_value, clr_normal);
-    } else {
-      fprintf(stream_,
-              "%s%s%#g%s",
-              separator,
-              clr_vreg_value,
-              value,
-              clr_normal);
-    }
-    separator = ", ";
   }
 
-  if (rightmost_lane > 0) {
-    fprintf(stream_, ", ...");
+  fprintf(stream_,
+          "# %s%*s: %s",
+          clr_vreg_name,
+          kPrintRegisterNameFieldWidth,
+          name.str().c_str(),
+          clr_vreg_value);
+  PrintRegisterValue(vregisters_[code], format);
+  fprintf(stream_, "%s", clr_normal);
+  if ((format & kPrintRegAsFP) != 0) {
+    PrintRegisterValueFPAnnotations(vregisters_[code], format);
+  }
+  fprintf(stream_, "%s", suffix);
+}
+
+void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
+                                                   int reg_count,
+                                                   uint16_t focus_mask,
+                                                   PrintRegisterFormat format) {
+  bool print_fp = (format & kPrintRegAsFP) != 0;
+  // Suppress FP formatting, so we can specify the lanes we're interested in.
+  PrintRegisterFormat format_no_fp =
+      static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
+
+  for (int r = 0; r < reg_count; r++) {
+    int code = (rt_code + r) % kNumberOfVRegisters;
+    PrintVRegister(code, format_no_fp, "");
+    if (print_fp) {
+      PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
+    }
+    fprintf(stream_, "\n");
   }
-  fprintf(stream_, ")");
 }
 
+void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
+                                                   int q_index,
+                                                   int reg_count,
+                                                   uint16_t focus_mask,
+                                                   PrintRegisterFormat format) {
+  bool print_fp = (format & kPrintRegAsFP) != 0;
+  // Suppress FP formatting, so we can specify the lanes we're interested in.
+  PrintRegisterFormat format_no_fp =
+      static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
+
+  PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
+
+  const unsigned size = kQRegSizeInBytes;
+  unsigned byte_index = q_index * size;
+  const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
+  VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
+
+  for (int r = 0; r < reg_count; r++) {
+    int code = (rt_code + r) % kNumberOfZRegisters;
+    PrintPartialZRegister(code, q_index, format_no_fp, "");
+    if (print_fp) {
+      PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
+    }
+    fprintf(stream_, "\n");
+  }
+}
 
-void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
+void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
+  // We're going to print the register in parts, so force a partial format.
+  format = GetPrintRegPartial(format);
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+    PrintPartialZRegister(code, i, format);
+  }
   vregisters_[code].NotifyRegisterLogged();
+}
 
-  int lane_size_log2 = format & kPrintRegLaneSizeMask;
+void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
+  // We're going to print the register in parts, so force a partial format.
+  format = GetPrintRegPartial(format);
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+    PrintPartialPRegister(code, i, format);
+  }
+  pregisters_[code].NotifyRegisterLogged();
+}
 
-  int reg_size_log2;
-  if (format & kPrintRegAsQVector) {
-    reg_size_log2 = kQRegSizeInBytesLog2;
-  } else if (format & kPrintRegAsDVector) {
-    reg_size_log2 = kDRegSizeInBytesLog2;
-  } else {
-    // Scalar types.
-    reg_size_log2 = lane_size_log2;
+void Simulator::PrintFFR(PrintRegisterFormat format) {
+  // We're going to print the register in parts, so force a partial format.
+  format = GetPrintRegPartial(format);
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  SimPRegister& ffr = ReadFFR();
+  for (unsigned i = 0; i < (vl / kQRegSize); i++) {
+    PrintPartialPRegister("FFR", ffr, i, format);
   }
+  ffr.NotifyRegisterLogged();
+}
+
+void Simulator::PrintPartialZRegister(int code,
+                                      int q_index,
+                                      PrintRegisterFormat format,
+                                      const char* suffix) {
+  VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+  VIXL_ASSERT((format & kPrintRegPartial) != 0);
+  VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
 
-  int lane_count = 1 << (reg_size_log2 - lane_size_log2);
-  int lane_size = 1 << lane_size_log2;
+  // We _only_ trace partial Z register values in Q-sized chunks, because
+  // they're often too large to reasonably fit on a single line. Each line
+  // implies nothing about the unprinted bits.
+  //   "# z{code}<127:0>: 0x{-------------value------------}"
 
-  // The template for vector types:
-  //   "# v{code}: 0x{rawbits} (..., {value}, ...)".
-  // The template for scalar types:
-  //   "# v{code}: 0x{rawbits} ({reg}:{value})".
-  // The values in parentheses after the bit representations are floating-point
-  // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
+  format = GetPrintRegAsQChunkOfSVE(format);
 
-  PrintVRegisterRawHelper(code);
-  if (format & kPrintRegAsFP) {
-    PrintVRegisterFPHelper(code, lane_size, lane_count);
+  const unsigned size = kQRegSizeInBytes;
+  unsigned byte_index = q_index * size;
+  const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
+  VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
+
+  int lsb = q_index * kQRegSize;
+  int msb = lsb + kQRegSize - 1;
+  std::stringstream name;
+  name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
+
+  fprintf(stream_,
+          "# %s%*s: %s",
+          clr_vreg_name,
+          kPrintRegisterNameFieldWidth,
+          name.str().c_str(),
+          clr_vreg_value);
+  PrintRegisterValue(value, size, format);
+  fprintf(stream_, "%s", clr_normal);
+  if ((format & kPrintRegAsFP) != 0) {
+    PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
   }
+  fprintf(stream_, "%s", suffix);
+}
+
+void Simulator::PrintPartialPRegister(const char* name,
+                                      const SimPRegister& reg,
+                                      int q_index,
+                                      PrintRegisterFormat format,
+                                      const char* suffix) {
+  VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+  VIXL_ASSERT((format & kPrintRegPartial) != 0);
+  VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
 
-  fprintf(stream_, "\n");
+  // We don't currently use the format for anything here.
+  USE(format);
+
+  // We _only_ trace partial P register values, because they're often too large
+  // to reasonably fit on a single line. Each line implies nothing about the
+  // unprinted bits.
+  //
+  // We print values in binary, with spaces between each bit, in order for the
+  // bits to align with the Z register bytes that they predicate.
+  //   "# {name}<15:0>: 0b{-------------value------------}"
+
+  int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
+  int lsb = q_index * print_size_in_bits;
+  int msb = lsb + print_size_in_bits - 1;
+  std::stringstream prefix;
+  prefix << name << '<' << msb << ':' << lsb << '>';
+
+  fprintf(stream_,
+          "# %s%*s: %s0b",
+          clr_preg_name,
+          kPrintRegisterNameFieldWidth,
+          prefix.str().c_str(),
+          clr_preg_value);
+  for (int i = msb; i >= lsb; i--) {
+    fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
+  }
+  fprintf(stream_, "%s%s", clr_normal, suffix);
 }
 
+void Simulator::PrintPartialPRegister(int code,
+                                      int q_index,
+                                      PrintRegisterFormat format,
+                                      const char* suffix) {
+  VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
+  PrintPartialPRegister(PRegNameForCode(code),
+                        pregisters_[code],
+                        q_index,
+                        format,
+                        suffix);
+}
 
 void Simulator::PrintSystemRegister(SystemRegister id) {
   switch (id) {
@@ -954,90 +1225,347 @@ void Simulator::PrintSystemRegister(SystemRegister id) {
   }
 }
 
-
-void Simulator::PrintRead(uintptr_t address,
-                          unsigned reg_code,
-                          PrintRegisterFormat format) {
-  registers_[reg_code].NotifyRegisterLogged();
-
-  USE(format);
-
-  // The template is "# {reg}: 0x{value} <- {address}".
-  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
+uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
+                                       uint16_t future_access_mask,
+                                       int struct_element_count,
+                                       int lane_size_in_bytes,
+                                       const char* op,
+                                       uintptr_t address,
+                                       int reg_size_in_bytes) {
+  // We want to assume that we'll access at least one lane.
+  VIXL_ASSERT(access_mask != 0);
+  VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
+              (reg_size_in_bytes == kQRegSizeInBytes));
+  bool started_annotation = false;
+  // Indent to match the register field, the fixed formatting, and the value
+  // prefix ("0x"): "# {name}: 0x"
+  fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
+  // First, annotate the lanes (byte by byte).
+  for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
+    bool access = (access_mask & (1 << lane)) != 0;
+    bool future = (future_access_mask & (1 << lane)) != 0;
+    if (started_annotation) {
+      // If we've started an annotation, draw a horizontal line in addition to
+      // any other symbols.
+      if (access) {
+        fprintf(stream_, "─╨");
+      } else if (future) {
+        fprintf(stream_, "─║");
+      } else {
+        fprintf(stream_, "──");
+      }
+    } else {
+      if (access) {
+        started_annotation = true;
+        fprintf(stream_, " ╙");
+      } else if (future) {
+        fprintf(stream_, " ║");
+      } else {
+        fprintf(stream_, "  ");
+      }
+    }
+  }
+  VIXL_ASSERT(started_annotation);
+  fprintf(stream_, "─ 0x");
+  int lane_size_in_nibbles = lane_size_in_bytes * 2;
+  // Print the most-significant struct element first.
+  const char* sep = "";
+  for (int i = struct_element_count - 1; i >= 0; i--) {
+    int offset = lane_size_in_bytes * i;
+    uint64_t nibble = Memory::Read(lane_size_in_bytes, address + offset);
+    fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, nibble);
+    sep = "'";
+  }
   fprintf(stream_,
-          " <- %s0x%016" PRIxPTR "%s\n",
+          " %s %s0x%016" PRIxPTR "%s\n",
+          op,
           clr_memory_address,
           address,
           clr_normal);
+  return future_access_mask & ~access_mask;
 }
 
-
-void Simulator::PrintVRead(uintptr_t address,
-                           unsigned reg_code,
-                           PrintRegisterFormat format,
-                           unsigned lane) {
-  vregisters_[reg_code].NotifyRegisterLogged();
-
-  // The template is "# v{code}: 0x{rawbits} <- address".
-  PrintVRegisterRawHelper(reg_code);
-  if (format & kPrintRegAsFP) {
-    PrintVRegisterFPHelper(reg_code,
-                           GetPrintRegLaneSizeInBytes(format),
-                           GetPrintRegLaneCount(format),
-                           lane);
+void Simulator::PrintAccess(int code,
+                            PrintRegisterFormat format,
+                            const char* op,
+                            uintptr_t address) {
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+  if ((format & kPrintRegPartial) == 0) {
+    registers_[code].NotifyRegisterLogged();
   }
+  // Scalar-format accesses use a simple format:
+  //   "# {reg}: 0x{value} -> {address}"
+
+  // Suppress the newline, so the access annotation goes on the same line.
+  PrintRegister(code, format, "");
   fprintf(stream_,
-          " <- %s0x%016" PRIxPTR "%s\n",
+          " %s %s0x%016" PRIxPTR "%s\n",
+          op,
           clr_memory_address,
           address,
           clr_normal);
 }
 
+void Simulator::PrintVAccess(int code,
+                             PrintRegisterFormat format,
+                             const char* op,
+                             uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
 
-void Simulator::PrintWrite(uintptr_t address,
-                           unsigned reg_code,
-                           PrintRegisterFormat format) {
-  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  // Scalar-format accesses use a simple format:
+  //   "# v{code}: 0x{value} -> {address}"
 
-  // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
-  // and readable, the value is aligned with the values in the register trace.
-  PrintRegisterRawHelper(reg_code,
-                         Reg31IsZeroRegister,
-                         GetPrintRegSizeInBytes(format));
+  // Suppress the newline, so the access annotation goes on the same line.
+  PrintVRegister(code, format, "");
   fprintf(stream_,
-          " -> %s0x%016" PRIxPTR "%s\n",
+          " %s %s0x%016" PRIxPTR "%s\n",
+          op,
           clr_memory_address,
           address,
           clr_normal);
 }
 
+void Simulator::PrintVStructAccess(int rt_code,
+                                   int reg_count,
+                                   PrintRegisterFormat format,
+                                   const char* op,
+                                   uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // For example:
+  //   "# v{code}: 0x{value}"
+  //   "#     ...: 0x{value}"
+  //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
+  //   "#              ╙───── {struct_value} -> {highest_address}"
+
+  uint16_t lane_mask = GetPrintRegLaneMask(format);
+  PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+
+  int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
+  int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+  for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
+    uint16_t access_mask = 1 << i;
+    VIXL_ASSERT((lane_mask & access_mask) != 0);
+    lane_mask = PrintPartialAccess(access_mask,
+                                   lane_mask,
+                                   reg_count,
+                                   lane_size_in_bytes,
+                                   op,
+                                   address + (i * reg_count));
+  }
+}
+
+void Simulator::PrintVSingleStructAccess(int rt_code,
+                                         int reg_count,
+                                         int lane,
+                                         PrintRegisterFormat format,
+                                         const char* op,
+                                         uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // For example:
+  //   "# v{code}: 0x{value}"
+  //   "#     ...: 0x{value}"
+  //   "#              ╙───── {struct_value} -> {address}"
+
+  int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+  uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
+  PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+  PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
+}
+
+void Simulator::PrintVReplicatingStructAccess(int rt_code,
+                                              int reg_count,
+                                              PrintRegisterFormat format,
+                                              const char* op,
+                                              uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // For example:
+  //   "# v{code}: 0x{value}"
+  //   "#     ...: 0x{value}"
+  //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
+
+  int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
+  uint16_t lane_mask = GetPrintRegLaneMask(format);
+  PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
+  PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
+}
+
+void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // Scalar-format accesses are split into separate chunks, each of which uses a
+  // simple format:
+  //   "#   z{code}<127:0>: 0x{value} -> {address}"
+  //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
+  //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
+  // etc
+
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+    // Suppress the newline, so the access annotation goes on the same line.
+    PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
+    fprintf(stream_,
+            " %s %s0x%016" PRIxPTR "%s\n",
+            op,
+            clr_memory_address,
+            address,
+            clr_normal);
+    address += kQRegSizeInBytes;
+  }
+}
+
+void Simulator::PrintZStructAccess(int rt_code,
+                                   int reg_count,
+                                   const LogicPRegister& pg,
+                                   PrintRegisterFormat format,
+                                   int msize_in_bytes,
+                                   const char* op,
+                                   const LogicSVEAddressVector& addr) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // For example:
+  //   "# z{code}<255:128>: 0x{value}"
+  //   "#     ...<255:128>: 0x{value}"
+  //   "#                       ║   ╙─ {struct_value} -> {first_address}"
+  //   "#                       ╙───── {struct_value} -> {last_address}"
+
+  // We're going to print the register in parts, so force a partial format.
+  bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
+  format = GetPrintRegPartial(format);
+
+  int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
+  for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+    uint16_t pred =
+        pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
+    if ((pred == 0) && skip_inactive_chunks) continue;
+
+    PrintZRegistersForStructuredAccess(rt_code,
+                                       q_index,
+                                       reg_count,
+                                       pred,
+                                       format);
+    if (pred == 0) {
+      // This register chunk has no active lanes. The loop below would print
+      // nothing, so leave a blank line to keep structures grouped together.
+      fprintf(stream_, "#\n");
+      continue;
+    }
+    for (int i = 0; i < lanes_per_q; i++) {
+      uint16_t access = 1 << (i * esize_in_bytes);
+      int lane = (q_index * lanes_per_q) + i;
+      // Skip inactive lanes.
+      if ((pred & access) == 0) continue;
+      pred = PrintPartialAccess(access,
+                                pred,
+                                reg_count,
+                                msize_in_bytes,
+                                op,
+                                addr.GetStructAddress(lane));
+    }
+  }
 
-void Simulator::PrintVWrite(uintptr_t address,
-                            unsigned reg_code,
-                            PrintRegisterFormat format,
-                            unsigned lane) {
-  // The templates:
-  //   "# v{code}: 0x{rawbits} -> {address}"
-  //   "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
-  //   "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
-  // Because this trace doesn't represent a change to the source register's
-  // value, only the relevant part of the value is printed. To keep the trace
-  // tidy and readable, the raw value is aligned with the other values in the
-  // register trace.
-  int lane_count = GetPrintRegLaneCount(format);
-  int lane_size = GetPrintRegLaneSizeInBytes(format);
-  int reg_size = GetPrintRegSizeInBytes(format);
-  PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
-  if (format & kPrintRegAsFP) {
-    PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
+  // We print the whole register, even for stores.
+  for (int i = 0; i < reg_count; i++) {
+    vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
+  }
+}
+
+void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
+  VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
+
+  // Scalar-format accesses are split into separate chunks, each of which uses a
+  // simple format:
+  //   "#  p{code}<15:0>: 0b{value} -> {address}"
+  //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
+  //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
+  // etc
+
+  int vl = GetVectorLengthInBits();
+  VIXL_ASSERT((vl % kQRegSize) == 0);
+  for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
+    // Suppress the newline, so the access annotation goes on the same line.
+    PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
+    fprintf(stream_,
+            " %s %s0x%016" PRIxPTR "%s\n",
+            op,
+            clr_memory_address,
+            address,
+            clr_normal);
+    address += kQRegSizeInBytes;
   }
-  fprintf(stream_,
-          " -> %s0x%016" PRIxPTR "%s\n",
-          clr_memory_address,
-          address,
-          clr_normal);
 }
 
+void Simulator::PrintRead(int rt_code,
+                          PrintRegisterFormat format,
+                          uintptr_t address) {
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  registers_[rt_code].NotifyRegisterLogged();
+  PrintAccess(rt_code, format, "<-", address);
+}
+
+void Simulator::PrintExtendingRead(int rt_code,
+                                   PrintRegisterFormat format,
+                                   int access_size_in_bytes,
+                                   uintptr_t address) {
+  int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
+  if (access_size_in_bytes == reg_size_in_bytes) {
+    // There is no extension here, so print a simple load.
+    PrintRead(rt_code, format, address);
+    return;
+  }
+  VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
+
+  // For sign- and zero-extension, make it clear that the resulting register
+  // value is different from what is loaded from memory.
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  registers_[rt_code].NotifyRegisterLogged();
+  PrintRegister(rt_code, format);
+  PrintPartialAccess(1,
+                     0,
+                     1,
+                     access_size_in_bytes,
+                     "<-",
+                     address,
+                     kXRegSizeInBytes);
+}
+
+void Simulator::PrintVRead(int rt_code,
+                           PrintRegisterFormat format,
+                           uintptr_t address) {
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  vregisters_[rt_code].NotifyRegisterLogged();
+  PrintVAccess(rt_code, format, "<-", address);
+}
+
+void Simulator::PrintWrite(int rt_code,
+                           PrintRegisterFormat format,
+                           uintptr_t address) {
+  // Because this trace doesn't represent a change to the source register's
+  // value, only print the relevant part of the value.
+  format = GetPrintRegPartial(format);
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  registers_[rt_code].NotifyRegisterLogged();
+  PrintAccess(rt_code, format, "->", address);
+}
+
+void Simulator::PrintVWrite(int rt_code,
+                            PrintRegisterFormat format,
+                            uintptr_t address) {
+  // Because this trace doesn't represent a change to the source register's
+  // value, only print the relevant part of the value.
+  format = GetPrintRegPartial(format);
+  // It only makes sense to write scalar values here. Vectors are handled by
+  // PrintVStructAccess.
+  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
+  PrintVAccess(rt_code, format, "->", address);
+}
 
 void Simulator::PrintTakenBranch(const Instruction* target) {
   fprintf(stream_,
@@ -1047,7 +1575,6 @@ void Simulator::PrintTakenBranch(const Instruction* target) {
           reinterpret_cast<uint64_t>(target));
 }
 
-
 // Visitors---------------------------------------------------------------------
 
 
@@ -1289,7 +1816,7 @@ void Simulator::VisitAddSubShifted(const Instruction* instr) {
 
 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
   int64_t op2 = instr->GetImmAddSub()
-                << ((instr->GetShiftAddSub() == 1) ? 12 : 0);
+                << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
   AddSubHelper(instr, op2);
 }
 
@@ -1489,7 +2016,7 @@ void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
   // Approximate load-acquire by issuing a full barrier after the load.
   __sync_synchronize();
 
-  LogRead(address, rt, GetPrintRegisterFormat(element_size));
+  LogRead(rt, GetPrintRegisterFormat(element_size), address);
 }
 
 
@@ -1516,7 +2043,7 @@ void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
 
   Memory::Write<T>(address, ReadRegister<T>(rt));
 
-  LogWrite(address, rt, GetPrintRegisterFormat(element_size));
+  LogWrite(rt, GetPrintRegisterFormat(element_size), address);
 }
 
 
@@ -1603,7 +2130,7 @@ void Simulator::VisitLoadStorePAC(const Instruction* instr) {
 
   WriteXRegister(dst, Memory::Read<uint64_t>(addr_ptr), NoRegLog);
   unsigned access_size = 1 << 3;
-  LogRead(addr_ptr, dst, GetPrintRegisterFormatForSize(access_size));
+  LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
 }
 
 
@@ -1624,49 +2151,65 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
   unsigned srcdst = instr->GetRt();
   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
 
+  bool rt_is_vreg = false;
+  int extend_to_size = 0;
   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
   switch (op) {
     case LDRB_w:
       WriteWRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog);
+      extend_to_size = kWRegSizeInBytes;
       break;
     case LDRH_w:
       WriteWRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog);
+      extend_to_size = kWRegSizeInBytes;
       break;
     case LDR_w:
       WriteWRegister(srcdst, Memory::Read<uint32_t>(address), NoRegLog);
+      extend_to_size = kWRegSizeInBytes;
       break;
     case LDR_x:
       WriteXRegister(srcdst, Memory::Read<uint64_t>(address), NoRegLog);
+      extend_to_size = kXRegSizeInBytes;
       break;
     case LDRSB_w:
       WriteWRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog);
+      extend_to_size = kWRegSizeInBytes;
       break;
     case LDRSH_w:
       WriteWRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog);
+      extend_to_size = kWRegSizeInBytes;
       break;
     case LDRSB_x:
       WriteXRegister(srcdst, Memory::Read<int8_t>(address), NoRegLog);
+      extend_to_size = kXRegSizeInBytes;
       break;
     case LDRSH_x:
       WriteXRegister(srcdst, Memory::Read<int16_t>(address), NoRegLog);
+      extend_to_size = kXRegSizeInBytes;
       break;
     case LDRSW_x:
       WriteXRegister(srcdst, Memory::Read<int32_t>(address), NoRegLog);
+      extend_to_size = kXRegSizeInBytes;
       break;
     case LDR_b:
       WriteBRegister(srcdst, Memory::Read<uint8_t>(address), NoRegLog);
+      rt_is_vreg = true;
       break;
     case LDR_h:
       WriteHRegister(srcdst, Memory::Read<uint16_t>(address), NoRegLog);
+      rt_is_vreg = true;
       break;
     case LDR_s:
       WriteSRegister(srcdst, Memory::Read<float>(address), NoRegLog);
+      rt_is_vreg = true;
       break;
     case LDR_d:
       WriteDRegister(srcdst, Memory::Read<double>(address), NoRegLog);
+      rt_is_vreg = true;
       break;
     case LDR_q:
       WriteQRegister(srcdst, Memory::Read<qreg_t>(address), NoRegLog);
+      rt_is_vreg = true;
       break;
 
     case STRB_w:
@@ -1683,18 +2226,23 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
       break;
     case STR_b:
       Memory::Write<uint8_t>(address, ReadBRegister(srcdst));
+      rt_is_vreg = true;
       break;
     case STR_h:
       Memory::Write<uint16_t>(address, ReadHRegisterBits(srcdst));
+      rt_is_vreg = true;
       break;
     case STR_s:
       Memory::Write<float>(address, ReadSRegister(srcdst));
+      rt_is_vreg = true;
       break;
     case STR_d:
       Memory::Write<double>(address, ReadDRegister(srcdst));
+      rt_is_vreg = true;
       break;
     case STR_q:
       Memory::Write<qreg_t>(address, ReadQRegister(srcdst));
+      rt_is_vreg = true;
       break;
 
     // Ignore prfm hint instructions.
@@ -1705,22 +2253,25 @@ void Simulator::LoadStoreHelper(const Instruction* instr,
       VIXL_UNIMPLEMENTED();
   }
 
+  // Print a detailed trace (including the memory address).
+  bool extend = (extend_to_size != 0);
   unsigned access_size = 1 << instr->GetSizeLS();
+  unsigned result_size = extend ? extend_to_size : access_size;
+  PrintRegisterFormat print_format =
+      rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
+                 : GetPrintRegisterFormatForSize(result_size);
+
   if (instr->IsLoad()) {
-    if ((op == LDR_s) || (op == LDR_d)) {
-      LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
-    } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
-      LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    if (rt_is_vreg) {
+      LogVRead(srcdst, print_format, address);
     } else {
-      LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+      LogExtendingRead(srcdst, print_format, access_size, address);
     }
   } else if (instr->IsStore()) {
-    if ((op == STR_s) || (op == STR_d)) {
-      LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
-    } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
-      LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+    if (rt_is_vreg) {
+      LogVWrite(srcdst, print_format, address);
     } else {
-      LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
+      LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
     }
   } else {
     VIXL_ASSERT(op == PRFM);
@@ -1765,6 +2316,8 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
   // 'rt' and 'rt2' can only be aliased for stores.
   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
 
+  bool rt_is_vreg = false;
+  bool sign_extend = false;
   switch (op) {
     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
     // will print a more detailed log.
@@ -1776,6 +2329,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
     case LDP_s: {
       WriteSRegister(rt, Memory::Read<float>(address), NoRegLog);
       WriteSRegister(rt2, Memory::Read<float>(address2), NoRegLog);
+      rt_is_vreg = true;
       break;
     }
     case LDP_x: {
@@ -1786,16 +2340,19 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
     case LDP_d: {
       WriteDRegister(rt, Memory::Read<double>(address), NoRegLog);
       WriteDRegister(rt2, Memory::Read<double>(address2), NoRegLog);
+      rt_is_vreg = true;
       break;
     }
     case LDP_q: {
       WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog);
       WriteQRegister(rt2, Memory::Read<qreg_t>(address2), NoRegLog);
+      rt_is_vreg = true;
       break;
     }
     case LDPSW_x: {
       WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog);
       WriteXRegister(rt2, Memory::Read<int32_t>(address2), NoRegLog);
+      sign_extend = true;
       break;
     }
     case STP_w: {
@@ -1806,6 +2363,7 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
     case STP_s: {
       Memory::Write<float>(address, ReadSRegister(rt));
       Memory::Write<float>(address2, ReadSRegister(rt2));
+      rt_is_vreg = true;
       break;
     }
     case STP_x: {
@@ -1816,40 +2374,43 @@ void Simulator::LoadStorePairHelper(const Instruction* instr,
     case STP_d: {
       Memory::Write<double>(address, ReadDRegister(rt));
       Memory::Write<double>(address2, ReadDRegister(rt2));
+      rt_is_vreg = true;
       break;
     }
     case STP_q: {
       Memory::Write<qreg_t>(address, ReadQRegister(rt));
       Memory::Write<qreg_t>(address2, ReadQRegister(rt2));
+      rt_is_vreg = true;
       break;
     }
     default:
       VIXL_UNREACHABLE();
   }
 
-  // Print a detailed trace (including the memory address) instead of the basic
-  // register:value trace generated by set_*reg().
+  // Print a detailed trace (including the memory address).
+  unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
+  PrintRegisterFormat print_format =
+      rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
+                 : GetPrintRegisterFormatForSize(result_size);
+
   if (instr->IsLoad()) {
-    if ((op == LDP_s) || (op == LDP_d)) {
-      LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
-      LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
-    } else if (op == LDP_q) {
-      LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
-      LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    if (rt_is_vreg) {
+      LogVRead(rt, print_format, address);
+      LogVRead(rt2, print_format, address2);
+    } else if (sign_extend) {
+      LogExtendingRead(rt, print_format, element_size, address);
+      LogExtendingRead(rt2, print_format, element_size, address2);
     } else {
-      LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
-      LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+      LogRead(rt, print_format, address);
+      LogRead(rt2, print_format, address2);
     }
   } else {
-    if ((op == STP_s) || (op == STP_d)) {
-      LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
-      LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
-    } else if (op == STP_q) {
-      LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
-      LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+    if (rt_is_vreg) {
+      LogVWrite(rt, print_format, address);
+      LogVWrite(rt2, print_format, address2);
     } else {
-      LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
-      LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
+      LogWrite(rt, print_format, address);
+      LogWrite(rt2, print_format, address2);
     }
   }
 
@@ -1890,10 +2451,10 @@ void Simulator::CompareAndSwapHelper(const Instruction* instr) {
       __sync_synchronize();
     }
     Memory::Write<T>(address, newvalue);
-    LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+    LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
   }
-  WriteRegister<T>(rs, data);
-  LogRead(address, rs, GetPrintRegisterFormatForSize(element_size));
+  WriteRegister<T>(rs, data, NoRegLog);
+  LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
 }
 
 
@@ -1904,7 +2465,7 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
   unsigned rt = instr->GetRt();
   unsigned rn = instr->GetRn();
 
-  VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0));
+  VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
 
   unsigned element_size = sizeof(T);
   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
@@ -1925,8 +2486,8 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
   // associated with that location, even if the compare subsequently fails.
   local_monitor_.Clear();
 
-  T data_high = Memory::Read<T>(address);
-  T data_low = Memory::Read<T>(address2);
+  T data_low = Memory::Read<T>(address);
+  T data_high = Memory::Read<T>(address2);
 
   if (is_acquire) {
     // Approximate load-acquire by issuing a full barrier after the load.
@@ -1941,22 +2502,82 @@ void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
       __sync_synchronize();
     }
 
-    Memory::Write<T>(address, newvalue_high);
-    Memory::Write<T>(address2, newvalue_low);
+    Memory::Write<T>(address, newvalue_low);
+    Memory::Write<T>(address2, newvalue_high);
   }
 
-  WriteRegister<T>(rs + 1, data_high);
-  WriteRegister<T>(rs, data_low);
+  WriteRegister<T>(rs + 1, data_high, NoRegLog);
+  WriteRegister<T>(rs, data_low, NoRegLog);
 
-  LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size));
-  LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size));
+  PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+  LogRead(rs, format, address);
+  LogRead(rs + 1, format, address2);
 
   if (same) {
-    LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size));
-    LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size));
+    LogWrite(rt, format, address);
+    LogWrite(rt + 1, format, address2);
+  }
+}
+
+bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
+  // To simulate fault-tolerant loads, we need to know what host addresses we
+  // can access without generating a real fault. One way to do that is to
+  // attempt to `write()` the memory to a dummy pipe[1]. This is more portable
+  // and less intrusive than using (global) signal handlers.
+  //
+  // [1]: https://stackoverflow.com/questions/7134590
+
+  size_t written = 0;
+  bool can_read = true;
+  // `write` will normally return after one invocation, but it is allowed to
+  // handle only part of the operation, so wrap it in a loop.
+  while (can_read && (written < size)) {
+    ssize_t result = write(dummy_pipe_fd_[1],
+                           reinterpret_cast<void*>(address + written),
+                           size - written);
+    if (result > 0) {
+      written += result;
+    } else {
+      switch (result) {
+        case -EPERM:
+        case -EFAULT:
+          // The address range is not accessible.
+          // `write` is supposed to return -EFAULT in this case, but in practice
+          // it seems to return -EPERM, so we accept that too.
+          can_read = false;
+          break;
+        case -EINTR:
+          // The call was interrupted by a signal. Just try again.
+          break;
+        default:
+          // Any other error is fatal.
+          VIXL_ABORT();
+      }
+    }
+  }
+  // Drain the read side of the pipe. If we don't do this, we'll leak memory as
+  // the dummy data is buffered. As before, we expect to drain the whole write
+  // in one invocation, but cannot guarantee that, so we wrap it in a loop. This
+  // function is primarily intended to implement SVE fault-tolerant loads, so
+  // the maximum Z register size is a good default buffer size.
+  char buffer[kZRegMaxSizeInBytes];
+  while (written > 0) {
+    ssize_t result = read(dummy_pipe_fd_[0],
+                          reinterpret_cast<void*>(buffer),
+                          sizeof(buffer));
+    // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
+    // we've read everything that was written, so treat 0 as an error.
+    if (result > 0) {
+      VIXL_ASSERT(static_cast<size_t>(result) <= written);
+      written -= result;
+    } else {
+      // For -EINTR, just try again. We can't handle any other error.
+      VIXL_CHECK(result == -EINTR);
+    }
   }
-}
 
+  return can_read;
+}
 
 void Simulator::PrintExclusiveAccessWarning() {
   if (print_exclusive_access_warning_) {
@@ -1971,7 +2592,6 @@ void Simulator::PrintExclusiveAccessWarning() {
   }
 }
 
-
 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
   LoadStoreExclusive op =
       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
@@ -2045,30 +2665,35 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
 
         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
         // We will print a more detailed log.
+        unsigned reg_size = 0;
         switch (op) {
           case LDXRB_w:
           case LDAXRB_w:
           case LDARB_w:
           case LDLARB:
             WriteWRegister(rt, Memory::Read<uint8_t>(address), NoRegLog);
+            reg_size = kWRegSizeInBytes;
             break;
           case LDXRH_w:
           case LDAXRH_w:
           case LDARH_w:
           case LDLARH:
             WriteWRegister(rt, Memory::Read<uint16_t>(address), NoRegLog);
+            reg_size = kWRegSizeInBytes;
             break;
           case LDXR_w:
           case LDAXR_w:
           case LDAR_w:
           case LDLAR_w:
             WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog);
+            reg_size = kWRegSizeInBytes;
             break;
           case LDXR_x:
           case LDAXR_x:
           case LDAR_x:
           case LDLAR_x:
             WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog);
+            reg_size = kXRegSizeInBytes;
             break;
           case LDXP_w:
           case LDAXP_w:
@@ -2076,6 +2701,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
             WriteWRegister(rt2,
                            Memory::Read<uint32_t>(address + element_size),
                            NoRegLog);
+            reg_size = kWRegSizeInBytes;
             break;
           case LDXP_x:
           case LDAXP_x:
@@ -2083,6 +2709,7 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
             WriteXRegister(rt2,
                            Memory::Read<uint64_t>(address + element_size),
                            NoRegLog);
+            reg_size = kXRegSizeInBytes;
             break;
           default:
             VIXL_UNREACHABLE();
@@ -2093,11 +2720,10 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
           __sync_synchronize();
         }
 
-        LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+        PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
+        LogExtendingRead(rt, format, element_size, address);
         if (is_pair) {
-          LogRead(address + element_size,
-                  rt2,
-                  GetPrintRegisterFormatForSize(element_size));
+          LogExtendingRead(rt2, format, element_size, address + element_size);
         }
       } else {
         if (is_acquire_release) {
@@ -2161,11 +2787,11 @@ void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
               VIXL_UNREACHABLE();
           }
 
-          LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
+          PrintRegisterFormat format =
+              GetPrintRegisterFormatForSize(element_size);
+          LogWrite(rt, format, address);
           if (is_pair) {
-            LogWrite(address + element_size,
-                     rt2,
-                     GetPrintRegisterFormatForSize(element_size));
+            LogWrite(rt2, format, address + element_size);
           }
         }
       }
@@ -2232,8 +2858,9 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
   Memory::Write<T>(address, result);
   WriteRegister<T>(rt, data, NoRegLog);
 
-  LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
-  LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
+  PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+  LogRead(rt, format, address);
+  LogWrite(rs, format, address);
 }
 
 template <typename T>
@@ -2264,8 +2891,9 @@ void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
 
   WriteRegister<T>(rt, data);
 
-  LogRead(address, rt, GetPrintRegisterFormat(element_size));
-  LogWrite(address, rs, GetPrintRegisterFormat(element_size));
+  PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
+  LogRead(rt, format, address);
+  LogWrite(rs, format, address);
 }
 
 template <typename T>
@@ -2283,7 +2911,7 @@ void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
   // Approximate load-acquire by issuing a full barrier after the load.
   __sync_synchronize();
 
-  LogRead(address, rt, GetPrintRegisterFormat(element_size));
+  LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
 }
 
 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
@@ -2400,27 +3028,27 @@ void Simulator::VisitLoadLiteral(const Instruction* instr) {
     // print a more detailed log.
     case LDR_w_lit:
       WriteWRegister(rt, Memory::Read<uint32_t>(address), NoRegLog);
-      LogRead(address, rt, kPrintWReg);
+      LogRead(rt, kPrintWReg, address);
       break;
     case LDR_x_lit:
       WriteXRegister(rt, Memory::Read<uint64_t>(address), NoRegLog);
-      LogRead(address, rt, kPrintXReg);
+      LogRead(rt, kPrintXReg, address);
       break;
     case LDR_s_lit:
       WriteSRegister(rt, Memory::Read<float>(address), NoRegLog);
-      LogVRead(address, rt, kPrintSReg);
+      LogVRead(rt, kPrintSRegFP, address);
       break;
     case LDR_d_lit:
       WriteDRegister(rt, Memory::Read<double>(address), NoRegLog);
-      LogVRead(address, rt, kPrintDReg);
+      LogVRead(rt, kPrintDRegFP, address);
       break;
     case LDR_q_lit:
       WriteQRegister(rt, Memory::Read<qreg_t>(address), NoRegLog);
-      LogVRead(address, rt, kPrintReg1Q);
+      LogVRead(rt, kPrintReg1Q, address);
       break;
     case LDRSW_x_lit:
       WriteXRegister(rt, Memory::Read<int32_t>(address), NoRegLog);
-      LogRead(address, rt, kPrintWReg);
+      LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
       break;
 
     // Ignore prfm hint instructions.
@@ -2795,40 +3423,6 @@ void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
 }
 
 
-// The algorithm used is adapted from the one described in section 8.2 of
-//   Hacker's Delight, by Henry S. Warren, Jr.
-template <typename T>
-static int64_t MultiplyHigh(T u, T v) {
-  uint64_t u0, v0, w0, u1, v1, w1, w2, t;
-  uint64_t sign_mask = UINT64_C(0x8000000000000000);
-  uint64_t sign_ext = 0;
-  if (std::numeric_limits<T>::is_signed) {
-    sign_ext = UINT64_C(0xffffffff00000000);
-  }
-
-  VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
-  VIXL_ASSERT(sizeof(u) == sizeof(u0));
-
-  u0 = u & 0xffffffff;
-  u1 = u >> 32 | (((u & sign_mask) != 0) ? sign_ext : 0);
-  v0 = v & 0xffffffff;
-  v1 = v >> 32 | (((v & sign_mask) != 0) ? sign_ext : 0);
-
-  w0 = u0 * v0;
-  t = u1 * v0 + (w0 >> 32);
-
-  w1 = t & 0xffffffff;
-  w2 = t >> 32 | (((t & sign_mask) != 0) ? sign_ext : 0);
-  w1 = u0 * v1 + w1;
-  w1 = w1 >> 32 | (((w1 & sign_mask) != 0) ? sign_ext : 0);
-
-  uint64_t value = u1 * v1 + w2 + w1;
-  int64_t result;
-  memcpy(&result, &value, sizeof(result));
-  return result;
-}
-
-
 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
 
@@ -2864,12 +3458,13 @@ void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
       break;
     case UMULH_x:
-      result = MultiplyHigh(ReadRegister<uint64_t>(instr->GetRn()),
-                            ReadRegister<uint64_t>(instr->GetRm()));
+      result =
+          internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
+                                     ReadRegister<uint64_t>(instr->GetRm()));
       break;
     case SMULH_x:
-      result = MultiplyHigh(ReadXRegister(instr->GetRn()),
-                            ReadXRegister(instr->GetRm()));
+      result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
+                                          ReadXRegister(instr->GetRm()));
       break;
     default:
       VIXL_UNIMPLEMENTED();
@@ -2936,9 +3531,10 @@ void Simulator::VisitExtract(const Instruction* instr) {
   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
   uint64_t low_res =
       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
-  uint64_t high_res =
-      (lsb == 0) ? 0 : ReadRegister<uint64_t>(reg_size, instr->GetRn())
-                           << (reg_size - lsb);
+  uint64_t high_res = (lsb == 0)
+                          ? 0
+                          : ReadRegister<uint64_t>(reg_size, instr->GetRn())
+                                << (reg_size - lsb);
   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
 }
 
@@ -3948,8 +4544,8 @@ void Simulator::VisitSystem(const Instruction* instr) {
             break;
           case RNDR:
           case RNDRRS: {
-            uint64_t high = jrand48(rndr_state_);
-            uint64_t low = jrand48(rndr_state_);
+            uint64_t high = jrand48(rand_state_);
+            uint64_t low = jrand48(rand_state_);
             uint64_t rand_num = (high << 32) | (low & 0xffffffff);
             WriteXRegister(instr->GetRt(), rand_num);
             // Simulate successful random number generation.
@@ -4530,10 +5126,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) {
         fminnm(vf, rd, rn, rm);
         break;
       case NEON_FMLA:
-        fmla(vf, rd, rn, rm);
+        fmla(vf, rd, rd, rn, rm);
         break;
       case NEON_FMLS:
-        fmls(vf, rd, rn, rm);
+        fmls(vf, rd, rd, rn, rm);
         break;
       case NEON_FMULX:
         fmulx(vf, rd, rn, rm);
@@ -4624,10 +5220,10 @@ void Simulator::VisitNEON3Same(const Instruction* instr) {
         cmptst(vf, rd, rn, rm);
         break;
       case NEON_MLS:
-        mls(vf, rd, rn, rm);
+        mls(vf, rd, rd, rn, rm);
         break;
       case NEON_MLA:
-        mla(vf, rd, rn, rm);
+        mla(vf, rd, rd, rn, rm);
         break;
       case NEON_MUL:
         mul(vf, rd, rn, rm);
@@ -4754,13 +5350,11 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
     B(vf, rd, rn, rm); \
     break;
     SIM_FUNC(FMAXNM, fmaxnm);
-    SIM_FUNC(FMLA, fmla);
     SIM_FUNC(FADD, fadd);
     SIM_FUNC(FMULX, fmulx);
     SIM_FUNC(FMAX, fmax);
     SIM_FUNC(FRECPS, frecps);
     SIM_FUNC(FMINNM, fminnm);
-    SIM_FUNC(FMLS, fmls);
     SIM_FUNC(FSUB, fsub);
     SIM_FUNC(FMIN, fmin);
     SIM_FUNC(FRSQRTS, frsqrts);
@@ -4773,6 +5367,12 @@ void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
     SIM_FUNC(FABD, fabd);
     SIM_FUNC(FMINP, fminp);
 #undef SIM_FUNC
+    case NEON_FMLA_H:
+      fmla(vf, rd, rd, rn, rm);
+      break;
+    case NEON_FMLS_H:
+      fmls(vf, rd, rd, rn, rm);
+      break;
     case NEON_FCMEQ_H:
       fcmp(vf, rd, rn, rm, eq);
       break;
@@ -4803,7 +5403,7 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
   VectorFormat vf = nfd.GetVectorFormat();
   if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) {
     rot = instr->GetImmRotFcmlaVec();
-    fcmla(vf, rd, rn, rm, rot);
+    fcmla(vf, rd, rn, rm, rd, rot);
   } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) {
     rot = instr->GetImmRotFcadd();
     fcadd(vf, rd, rn, rm, rot);
@@ -5347,7 +5947,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
     addr[i] = addr_base + (i * reg_size);
   }
-  int count = 1;
+  int struct_parts = 1;
+  int reg_count = 1;
   bool log_read = true;
 
   // Bit 23 determines whether this is an offset or post-index addressing mode.
@@ -5363,17 +5964,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
     case NEON_LD1_4v:
     case NEON_LD1_4v_post:
       ld1(vf, ReadVRegister(reg[3]), addr[3]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_3v:
     case NEON_LD1_3v_post:
       ld1(vf, ReadVRegister(reg[2]), addr[2]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_2v:
     case NEON_LD1_2v_post:
       ld1(vf, ReadVRegister(reg[1]), addr[1]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_LD1_1v:
     case NEON_LD1_1v_post:
@@ -5382,17 +5983,17 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
     case NEON_ST1_4v:
     case NEON_ST1_4v_post:
       st1(vf, ReadVRegister(reg[3]), addr[3]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_3v:
     case NEON_ST1_3v_post:
       st1(vf, ReadVRegister(reg[2]), addr[2]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_2v:
     case NEON_ST1_2v_post:
       st1(vf, ReadVRegister(reg[1]), addr[1]);
-      count++;
+      reg_count++;
       VIXL_FALLTHROUGH();
     case NEON_ST1_1v:
     case NEON_ST1_1v_post:
@@ -5402,12 +6003,14 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
     case NEON_LD2_post:
     case NEON_LD2:
       ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
-      count = 2;
+      struct_parts = 2;
+      reg_count = 2;
       break;
     case NEON_ST2:
     case NEON_ST2_post:
       st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0]);
-      count = 2;
+      struct_parts = 2;
+      reg_count = 2;
       log_read = false;
       break;
     case NEON_LD3_post:
@@ -5417,7 +6020,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
           ReadVRegister(reg[1]),
           ReadVRegister(reg[2]),
           addr[0]);
-      count = 3;
+      struct_parts = 3;
+      reg_count = 3;
       break;
     case NEON_ST3:
     case NEON_ST3_post:
@@ -5426,7 +6030,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
           ReadVRegister(reg[1]),
           ReadVRegister(reg[2]),
           addr[0]);
-      count = 3;
+      struct_parts = 3;
+      reg_count = 3;
       log_read = false;
       break;
     case NEON_ST4:
@@ -5437,7 +6042,8 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
           ReadVRegister(reg[2]),
           ReadVRegister(reg[3]),
           addr[0]);
-      count = 4;
+      struct_parts = 4;
+      reg_count = 4;
       log_read = false;
       break;
     case NEON_LD4_post:
@@ -5448,22 +6054,31 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
           ReadVRegister(reg[2]),
           ReadVRegister(reg[3]),
           addr[0]);
-      count = 4;
+      struct_parts = 4;
+      reg_count = 4;
       break;
     default:
       VIXL_UNIMPLEMENTED();
   }
 
-  // Explicitly log the register update whilst we have type information.
-  for (int i = 0; i < count; i++) {
-    // For de-interleaving loads, only print the base address.
-    int lane_size = LaneSizeInBytesFromFormat(vf);
-    PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
-        GetPrintRegisterFormatForSize(reg_size, lane_size));
+  bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
+  if (do_trace) {
+    PrintRegisterFormat print_format =
+        GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+    const char* op;
     if (log_read) {
-      LogVRead(addr_base, reg[i], format);
+      op = "<-";
     } else {
-      LogVWrite(addr_base, reg[i], format);
+      op = "->";
+      // Stores don't represent a change to the source register's value, so only
+      // print the relevant part of the value.
+      print_format = GetPrintRegPartial(print_format);
+    }
+
+    VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
+    for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
+      uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
+      PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
     }
   }
 
@@ -5471,7 +6086,7 @@ void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
     int rm = instr->GetRm();
     // The immediate post index addressing mode is indicated by rm = 31.
     // The immediate is implied by the number of vector registers used.
-    addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
+    addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
                             : ReadXRegister(rm);
     WriteXRegister(instr->GetRn(), addr_base);
   } else {
@@ -5507,6 +6122,8 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
   // and PostIndex addressing.
   bool do_load = false;
 
+  bool replicating = false;
+
   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   VectorFormat vf_t = nfd.GetVectorFormat();
 
@@ -5581,99 +6198,67 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
     }
 
     case NEON_LD1R:
-    case NEON_LD1R_post: {
-      vf = vf_t;
-      ld1r(vf, ReadVRegister(rt), addr);
-      do_load = true;
-      break;
-    }
-
+    case NEON_LD1R_post:
     case NEON_LD2R:
-    case NEON_LD2R_post: {
-      vf = vf_t;
-      int rt2 = (rt + 1) % kNumberOfVRegisters;
-      ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
-      do_load = true;
-      break;
-    }
-
+    case NEON_LD2R_post:
     case NEON_LD3R:
-    case NEON_LD3R_post: {
-      vf = vf_t;
-      int rt2 = (rt + 1) % kNumberOfVRegisters;
-      int rt3 = (rt2 + 1) % kNumberOfVRegisters;
-      ld3r(vf, ReadVRegister(rt), ReadVRegister(rt2), ReadVRegister(rt3), addr);
-      do_load = true;
-      break;
-    }
-
+    case NEON_LD3R_post:
     case NEON_LD4R:
-    case NEON_LD4R_post: {
+    case NEON_LD4R_post:
       vf = vf_t;
-      int rt2 = (rt + 1) % kNumberOfVRegisters;
-      int rt3 = (rt2 + 1) % kNumberOfVRegisters;
-      int rt4 = (rt3 + 1) % kNumberOfVRegisters;
-      ld4r(vf,
-           ReadVRegister(rt),
-           ReadVRegister(rt2),
-           ReadVRegister(rt3),
-           ReadVRegister(rt4),
-           addr);
       do_load = true;
+      replicating = true;
       break;
-    }
+
     default:
       VIXL_UNIMPLEMENTED();
   }
 
-  PrintRegisterFormat print_format =
-      GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
-  // Make sure that the print_format only includes a single lane.
-  print_format =
-      static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
-
-  int esize = LaneSizeInBytesFromFormat(vf);
   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
   int lane = instr->GetNEONLSIndex(index_shift);
-  int scale = 0;
+  int reg_count = 0;
   int rt2 = (rt + 1) % kNumberOfVRegisters;
   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
     case NEONLoadStoreSingle1:
-      scale = 1;
-      if (do_load) {
+      reg_count = 1;
+      if (replicating) {
+        VIXL_ASSERT(do_load);
+        ld1r(vf, ReadVRegister(rt), addr);
+      } else if (do_load) {
         ld1(vf, ReadVRegister(rt), lane, addr);
-        LogVRead(addr, rt, print_format, lane);
       } else {
         st1(vf, ReadVRegister(rt), lane, addr);
-        LogVWrite(addr, rt, print_format, lane);
       }
       break;
     case NEONLoadStoreSingle2:
-      scale = 2;
-      if (do_load) {
+      reg_count = 2;
+      if (replicating) {
+        VIXL_ASSERT(do_load);
+        ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr);
+      } else if (do_load) {
         ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
-        LogVRead(addr, rt, print_format, lane);
-        LogVRead(addr + esize, rt2, print_format, lane);
       } else {
         st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr);
-        LogVWrite(addr, rt, print_format, lane);
-        LogVWrite(addr + esize, rt2, print_format, lane);
       }
       break;
     case NEONLoadStoreSingle3:
-      scale = 3;
-      if (do_load) {
+      reg_count = 3;
+      if (replicating) {
+        VIXL_ASSERT(do_load);
+        ld3r(vf,
+             ReadVRegister(rt),
+             ReadVRegister(rt2),
+             ReadVRegister(rt3),
+             addr);
+      } else if (do_load) {
         ld3(vf,
             ReadVRegister(rt),
             ReadVRegister(rt2),
             ReadVRegister(rt3),
             lane,
             addr);
-        LogVRead(addr, rt, print_format, lane);
-        LogVRead(addr + esize, rt2, print_format, lane);
-        LogVRead(addr + (2 * esize), rt3, print_format, lane);
       } else {
         st3(vf,
             ReadVRegister(rt),
@@ -5681,14 +6266,19 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
             ReadVRegister(rt3),
             lane,
             addr);
-        LogVWrite(addr, rt, print_format, lane);
-        LogVWrite(addr + esize, rt2, print_format, lane);
-        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
       }
       break;
     case NEONLoadStoreSingle4:
-      scale = 4;
-      if (do_load) {
+      reg_count = 4;
+      if (replicating) {
+        VIXL_ASSERT(do_load);
+        ld4r(vf,
+             ReadVRegister(rt),
+             ReadVRegister(rt2),
+             ReadVRegister(rt3),
+             ReadVRegister(rt4),
+             addr);
+      } else if (do_load) {
         ld4(vf,
             ReadVRegister(rt),
             ReadVRegister(rt2),
@@ -5696,10 +6286,6 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
             ReadVRegister(rt4),
             lane,
             addr);
-        LogVRead(addr, rt, print_format, lane);
-        LogVRead(addr + esize, rt2, print_format, lane);
-        LogVRead(addr + (2 * esize), rt3, print_format, lane);
-        LogVRead(addr + (3 * esize), rt4, print_format, lane);
       } else {
         st4(vf,
             ReadVRegister(rt),
@@ -5708,22 +6294,38 @@ void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
             ReadVRegister(rt4),
             lane,
             addr);
-        LogVWrite(addr, rt, print_format, lane);
-        LogVWrite(addr + esize, rt2, print_format, lane);
-        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
-        LogVWrite(addr + (3 * esize), rt4, print_format, lane);
       }
       break;
     default:
       VIXL_UNIMPLEMENTED();
   }
 
+  // Trace registers and/or memory writes.
+  PrintRegisterFormat print_format =
+      GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
+  if (do_load) {
+    if (ShouldTraceVRegs()) {
+      if (replicating) {
+        PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
+      } else {
+        PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
+      }
+    }
+  } else {
+    if (ShouldTraceWrites()) {
+      // Stores don't represent a change to the source register's value, so only
+      // print the relevant part of the value.
+      print_format = GetPrintRegPartial(print_format);
+      PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
+    }
+  }
+
   if (addr_mode == PostIndex) {
     int rm = instr->GetRm();
     int lane_size = LaneSizeInBytesFromFormat(vf);
     WriteXRegister(instr->GetRn(),
-                   addr +
-                       ((rm == 31) ? (scale * lane_size) : ReadXRegister(rm)));
+                   addr + ((rm == 31) ? (reg_count * lane_size)
+                                      : ReadXRegister(rm)));
   }
 }
 
@@ -6421,10 +7023,10 @@ void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
   NEONFormatDecoder nfd(instr, &map);
   VectorFormat vf = nfd.GetVectorFormat();
 
-  int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh());
-  int immhimmb = instr->GetImmNEONImmhImmb();
-  int right_shift = (16 << highestSetBit) - immhimmb;
-  int left_shift = immhimmb - (8 << highestSetBit);
+  int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
+  int immh_immb = instr->GetImmNEONImmhImmb();
+  int right_shift = (16 << highest_set_bit) - immh_immb;
+  int left_shift = immh_immb - (8 << highest_set_bit);
   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
     case NEON_SHL_scalar:
       shl(vf, rd, rn, left_shift);
@@ -6529,10 +7131,10 @@ void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
 
-  int highestSetBit = HighestSetBitPosition(instr->GetImmNEONImmh());
-  int immhimmb = instr->GetImmNEONImmhImmb();
-  int right_shift = (16 << highestSetBit) - immhimmb;
-  int left_shift = immhimmb - (8 << highestSetBit);
+  int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
+  int immh_immb = instr->GetImmNEONImmhImmb();
+  int right_shift = (16 << highest_set_bit) - immh_immb;
+  int left_shift = immh_immb - (8 << highest_set_bit);
 
   switch (instr->Mask(NEONShiftImmediateMask)) {
     case NEON_SHL:
@@ -6741,6 +7343,4356 @@ void Simulator::VisitNEONPerm(const Instruction* instr) {
   }
 }
 
+void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  SimVRegister temp;
+
+  VectorFormat vform = kFormatVnD;
+  mov(vform, temp, zm);
+
+  switch (instr->Mask(SVEAddressGenerationMask)) {
+    case ADR_z_az_d_s32_scaled:
+      sxt(vform, temp, temp, kSRegSize);
+      break;
+    case ADR_z_az_d_u32_scaled:
+      uxt(vform, temp, temp, kSRegSize);
+      break;
+    case ADR_z_az_s_same_scaled:
+      vform = kFormatVnS;
+      break;
+    case ADR_z_az_d_same_scaled:
+      // Nothing to do.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  int shift_amount = instr->ExtractBits(11, 10);
+  shl(vform, temp, temp, shift_amount);
+  add(vform, zd, zn, temp);
+}
+
+void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
+    const Instruction* instr) {
+  Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
+  switch (op) {
+    case AND_z_zi:
+    case EOR_z_zi:
+    case ORR_z_zi: {
+      int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+      uint64_t imm = instr->GetSVEImmLogical();
+      // Valid immediate is a non-zero bits
+      VIXL_ASSERT(imm != 0);
+      SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
+                              op),
+                          SVEFormatFromLaneSizeInBytesLog2(lane_size),
+                          ReadVRegister(instr->GetRd()),
+                          imm);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
+  switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
+    case DUPM_z_i: {
+      /* DUPM uses the same lane size and immediate encoding as bitwise logical
+       * immediate instructions. */
+      int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
+      uint64_t imm = instr->GetSVEImmLogical();
+      VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+      dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
+
+  LogicalOp logical_op;
+  switch (op) {
+    case AND_z_zz:
+      logical_op = AND;
+      break;
+    case BIC_z_zz:
+      logical_op = BIC;
+      break;
+    case EOR_z_zz:
+      logical_op = EOR;
+      break;
+    case ORR_z_zz:
+      logical_op = ORR;
+      break;
+    default:
+      logical_op = LogicalOpMask;
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  // Lane size of registers is irrelevant to the bitwise operations, so perform
+  // the operation on D-sized lanes.
+  SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
+}
+
+void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  SimVRegister scratch;
+  SimVRegister result;
+
+  bool for_division = false;
+  Shift shift_op = NO_SHIFT;
+  switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
+    case ASRD_z_p_zi:
+      shift_op = ASR;
+      for_division = true;
+      break;
+    case ASR_z_p_zi:
+      shift_op = ASR;
+      break;
+    case LSL_z_p_zi:
+      shift_op = LSL;
+      break;
+    case LSR_z_p_zi:
+      shift_op = LSR;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  std::pair<int, int> shift_and_lane_size =
+      instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
+  unsigned lane_size = shift_and_lane_size.second;
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+  int shift_dist = shift_and_lane_size.first;
+
+  if ((shift_op == ASR) && for_division) {
+    asrd(vform, result, zdn, shift_dist);
+  } else {
+    if (shift_op == LSL) {
+      // Shift distance is computed differently for LSL. Convert the result.
+      shift_dist = (8 << lane_size) - shift_dist;
+    }
+    dup_immediate(vform, scratch, shift_dist);
+    SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  SimVRegister result;
+  SimVRegister shiftand;  // Vector to be shifted.
+  SimVRegister shiftor;   // Vector shift amount.
+
+  Shift shift_op = ASR;
+  mov(vform, shiftand, zdn);
+  mov(vform, shiftor, zm);
+
+  switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
+    case ASRR_z_p_zz:
+      mov(vform, shiftand, zm);
+      mov(vform, shiftor, zdn);
+      VIXL_FALLTHROUGH();
+    case ASR_z_p_zz:
+      break;
+    case LSLR_z_p_zz:
+      mov(vform, shiftand, zm);
+      mov(vform, shiftor, zdn);
+      VIXL_FALLTHROUGH();
+    case LSL_z_p_zz:
+      shift_op = LSL;
+      break;
+    case LSRR_z_p_zz:
+      mov(vform, shiftand, zm);
+      mov(vform, shiftor, zdn);
+      VIXL_FALLTHROUGH();
+    case LSR_z_p_zz:
+      shift_op = LSR;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  SVEBitwiseShiftHelper(shift_op,
+                        vform,
+                        result,
+                        shiftand,
+                        shiftor,
+                        /* is_wide_elements = */ false);
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  SimVRegister result;
+  Shift shift_op = ASR;
+
+  switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
+    case ASR_z_p_zw:
+      break;
+    case LSL_z_p_zw:
+      shift_op = LSL;
+      break;
+    case LSR_z_p_zw:
+      shift_op = LSR;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  SVEBitwiseShiftHelper(shift_op,
+                        vform,
+                        result,
+                        zdn,
+                        zm,
+                        /* is_wide_elements = */ true);
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+  Shift shift_op;
+  switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+    case ASR_z_zi:
+    case ASR_z_zw:
+      shift_op = ASR;
+      break;
+    case LSL_z_zi:
+    case LSL_z_zw:
+      shift_op = LSL;
+      break;
+    case LSR_z_zi:
+    case LSR_z_zw:
+      shift_op = LSR;
+      break;
+    default:
+      shift_op = NO_SHIFT;
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
+    case ASR_z_zi:
+    case LSL_z_zi:
+    case LSR_z_zi: {
+      SimVRegister scratch;
+      std::pair<int, int> shift_and_lane_size =
+          instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+      unsigned lane_size = shift_and_lane_size.second;
+      VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
+      VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+      int shift_dist = shift_and_lane_size.first;
+      if (shift_op == LSL) {
+        // Shift distance is computed differently for LSL. Convert the result.
+        shift_dist = (8 << lane_size) - shift_dist;
+      }
+      dup_immediate(vform, scratch, shift_dist);
+      SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
+      break;
+    }
+    case ASR_z_zw:
+    case LSL_z_zw:
+    case LSR_z_zw:
+      SVEBitwiseShiftHelper(shift_op,
+                            instr->GetSVEVectorFormat(),
+                            zd,
+                            zn,
+                            ReadVRegister(instr->GetRm()),
+                            true);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
+  // Although the instructions have a separate encoding class, the lane size is
+  // encoded in the same way as most other SVE instructions.
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  int pattern = instr->GetImmSVEPredicateConstraint();
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  int multiplier = instr->ExtractBits(19, 16) + 1;
+
+  switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
+    case DECB_r_rs:
+    case DECD_r_rs:
+    case DECH_r_rs:
+    case DECW_r_rs:
+      count = -count;
+      break;
+    case INCB_r_rs:
+    case INCD_r_rs:
+    case INCH_r_rs:
+    case INCW_r_rs:
+      // Nothing to do.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      return;
+  }
+
+  WriteXRegister(instr->GetRd(),
+                 IncDecN(ReadXRegister(instr->GetRd()),
+                         count * multiplier,
+                         kXRegSize));
+}
+
+void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  int pattern = instr->GetImmSVEPredicateConstraint();
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  int multiplier = instr->ExtractBits(19, 16) + 1;
+
+  switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
+    case DECD_z_zs:
+    case DECH_z_zs:
+    case DECW_z_zs:
+      count = -count;
+      break;
+    case INCD_z_zs:
+    case INCH_z_zs:
+    case INCW_z_zs:
+      // Nothing to do.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister scratch;
+  dup_immediate(vform,
+                scratch,
+                IncDecN(0,
+                        count * multiplier,
+                        LaneSizeInBitsFromFormat(vform)));
+  add(vform, zd, zd, scratch);
+}
+
+void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
+    const Instruction* instr) {
+  // Although the instructions have a separate encoding class, the lane size is
+  // encoded in the same way as most other SVE instructions.
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  int pattern = instr->GetImmSVEPredicateConstraint();
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  int multiplier = instr->ExtractBits(19, 16) + 1;
+
+  unsigned width = kXRegSize;
+  bool is_signed = false;
+
+  switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
+    case SQDECB_r_rs_sx:
+    case SQDECD_r_rs_sx:
+    case SQDECH_r_rs_sx:
+    case SQDECW_r_rs_sx:
+      width = kWRegSize;
+      VIXL_FALLTHROUGH();
+    case SQDECB_r_rs_x:
+    case SQDECD_r_rs_x:
+    case SQDECH_r_rs_x:
+    case SQDECW_r_rs_x:
+      is_signed = true;
+      count = -count;
+      break;
+    case SQINCB_r_rs_sx:
+    case SQINCD_r_rs_sx:
+    case SQINCH_r_rs_sx:
+    case SQINCW_r_rs_sx:
+      width = kWRegSize;
+      VIXL_FALLTHROUGH();
+    case SQINCB_r_rs_x:
+    case SQINCD_r_rs_x:
+    case SQINCH_r_rs_x:
+    case SQINCW_r_rs_x:
+      is_signed = true;
+      break;
+    case UQDECB_r_rs_uw:
+    case UQDECD_r_rs_uw:
+    case UQDECH_r_rs_uw:
+    case UQDECW_r_rs_uw:
+      width = kWRegSize;
+      VIXL_FALLTHROUGH();
+    case UQDECB_r_rs_x:
+    case UQDECD_r_rs_x:
+    case UQDECH_r_rs_x:
+    case UQDECW_r_rs_x:
+      count = -count;
+      break;
+    case UQINCB_r_rs_uw:
+    case UQINCD_r_rs_uw:
+    case UQINCH_r_rs_uw:
+    case UQINCW_r_rs_uw:
+      width = kWRegSize;
+      VIXL_FALLTHROUGH();
+    case UQINCB_r_rs_x:
+    case UQINCD_r_rs_x:
+    case UQINCH_r_rs_x:
+    case UQINCW_r_rs_x:
+      // Nothing to do.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  WriteXRegister(instr->GetRd(),
+                 IncDecN(ReadXRegister(instr->GetRd()),
+                         count * multiplier,
+                         width,
+                         true,
+                         is_signed));
+}
+
+void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  int pattern = instr->GetImmSVEPredicateConstraint();
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  int multiplier = instr->ExtractBits(19, 16) + 1;
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister scratch;
+  dup_immediate(vform,
+                scratch,
+                IncDecN(0,
+                        count * multiplier,
+                        LaneSizeInBitsFromFormat(vform)));
+
+  switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
+    case SQDECD_z_zs:
+    case SQDECH_z_zs:
+    case SQDECW_z_zs:
+      sub(vform, zd, zd, scratch).SignedSaturate(vform);
+      break;
+    case SQINCD_z_zs:
+    case SQINCH_z_zs:
+    case SQINCW_z_zs:
+      add(vform, zd, zd, scratch).SignedSaturate(vform);
+      break;
+    case UQDECD_z_zs:
+    case UQDECH_z_zs:
+    case UQDECW_z_zs:
+      sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
+      break;
+    case UQINCD_z_zs:
+    case UQINCH_z_zs:
+    case UQINCW_z_zs:
+      add(vform, zd, zd, scratch).UnsignedSaturate(vform);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEElementCount(const Instruction* instr) {
+  switch (instr->Mask(SVEElementCountMask)) {
+    case CNTB_r_s:
+    case CNTD_r_s:
+    case CNTH_r_s:
+    case CNTW_r_s:
+      // All handled below.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  // Although the instructions are separated, the lane size is encoded in the
+  // same way as most other SVE instructions.
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  int pattern = instr->GetImmSVEPredicateConstraint();
+  int count = GetPredicateConstraintLaneCount(vform, pattern);
+  int multiplier = instr->ExtractBits(19, 16) + 1;
+  WriteXRegister(instr->GetRd(), count * multiplier);
+}
+
+void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& vdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
+    case FADDA_v_p_z:
+      fadda(vform, vdn, pg, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  SimVRegister result;
+
+  switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
+    case FABD_z_p_zz:
+      fabd(vform, result, zdn, zm);
+      break;
+    case FADD_z_p_zz:
+      fadd(vform, result, zdn, zm);
+      break;
+    case FDIVR_z_p_zz:
+      fdiv(vform, result, zm, zdn);
+      break;
+    case FDIV_z_p_zz:
+      fdiv(vform, result, zdn, zm);
+      break;
+    case FMAXNM_z_p_zz:
+      fmaxnm(vform, result, zdn, zm);
+      break;
+    case FMAX_z_p_zz:
+      fmax(vform, result, zdn, zm);
+      break;
+    case FMINNM_z_p_zz:
+      fminnm(vform, result, zdn, zm);
+      break;
+    case FMIN_z_p_zz:
+      fmin(vform, result, zdn, zm);
+      break;
+    case FMULX_z_p_zz:
+      fmulx(vform, result, zdn, zm);
+      break;
+    case FMUL_z_p_zz:
+      fmul(vform, result, zdn, zm);
+      break;
+    case FSCALE_z_p_zz:
+      fscale(vform, result, zdn, zm);
+      break;
+    case FSUBR_z_p_zz:
+      fsub(vform, result, zm, zdn);
+      break;
+    case FSUB_z_p_zz:
+      fsub(vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  int i1 = instr->ExtractBit(5);
+  SimVRegister add_sub_imm, min_max_imm, mul_imm;
+  uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
+  uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
+  uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
+  dup_immediate(vform, add_sub_imm, i1 ? one : half);
+  dup_immediate(vform, min_max_imm, i1 ? one : 0);
+  dup_immediate(vform, mul_imm, i1 ? two : half);
+
+  switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
+    case FADD_z_p_zs:
+      fadd(vform, result, zdn, add_sub_imm);
+      break;
+    case FMAXNM_z_p_zs:
+      fmaxnm(vform, result, zdn, min_max_imm);
+      break;
+    case FMAX_z_p_zs:
+      fmax(vform, result, zdn, min_max_imm);
+      break;
+    case FMINNM_z_p_zs:
+      fminnm(vform, result, zdn, min_max_imm);
+      break;
+    case FMIN_z_p_zs:
+      fmin(vform, result, zdn, min_max_imm);
+      break;
+    case FMUL_z_p_zs:
+      fmul(vform, result, zdn, mul_imm);
+      break;
+    case FSUBR_z_p_zs:
+      fsub(vform, result, add_sub_imm, zdn);
+      break;
+    case FSUB_z_p_zs:
+      fsub(vform, result, zdn, add_sub_imm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+  switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
+    case FTMAD_z_zzi:
+      ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
+    case FADD_z_zz:
+      fadd(vform, zd, zn, zm);
+      break;
+    case FMUL_z_zz:
+      fmul(vform, zd, zn, zm);
+      break;
+    case FRECPS_z_zz:
+      frecps(vform, zd, zn, zm);
+      break;
+    case FRSQRTS_z_zz:
+      frsqrts(vform, zd, zn, zm);
+      break;
+    case FSUB_z_zz:
+      fsub(vform, zd, zn, zm);
+      break;
+    case FTSMUL_z_zz:
+      ftsmul(vform, zd, zn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister result;
+
+  switch (instr->Mask(SVEFPCompareVectorsMask)) {
+    case FACGE_p_p_zz:
+      fabscmp(vform, result, zn, zm, ge);
+      break;
+    case FACGT_p_p_zz:
+      fabscmp(vform, result, zn, zm, gt);
+      break;
+    case FCMEQ_p_p_zz:
+      fcmp(vform, result, zn, zm, eq);
+      break;
+    case FCMGE_p_p_zz:
+      fcmp(vform, result, zn, zm, ge);
+      break;
+    case FCMGT_p_p_zz:
+      fcmp(vform, result, zn, zm, gt);
+      break;
+    case FCMNE_p_p_zz:
+      fcmp(vform, result, zn, zm, ne);
+      break;
+    case FCMUO_p_p_zz:
+      fcmp(vform, result, zn, zm, uo);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  ExtractFromSimVRegister(vform, pd, result);
+  mov_zeroing(pd, pg, pd);
+}
+
+void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister result;
+
+  SimVRegister zeros;
+  dup_immediate(kFormatVnD, zeros, 0);
+
+  switch (instr->Mask(SVEFPCompareWithZeroMask)) {
+    case FCMEQ_p_p_z0:
+      fcmp(vform, result, zn, zeros, eq);
+      break;
+    case FCMGE_p_p_z0:
+      fcmp(vform, result, zn, zeros, ge);
+      break;
+    case FCMGT_p_p_z0:
+      fcmp(vform, result, zn, zeros, gt);
+      break;
+    case FCMLE_p_p_z0:
+      fcmp(vform, result, zn, zeros, le);
+      break;
+    case FCMLT_p_p_z0:
+      fcmp(vform, result, zn, zeros, lt);
+      break;
+    case FCMNE_p_p_z0:
+      fcmp(vform, result, zn, zeros, ne);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  ExtractFromSimVRegister(vform, pd, result);
+  mov_zeroing(pd, pg, pd);
+}
+
+void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  int rot = instr->ExtractBit(16);
+
+  SimVRegister result;
+
+  switch (instr->Mask(SVEFPComplexAdditionMask)) {
+    case FCADD_z_p_zz:
+      fcadd(vform, result, zdn, zm, rot);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
+    VIXL_UNIMPLEMENTED();
+  }
+
+  SimVRegister& zda = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  int rot = instr->ExtractBits(14, 13);
+
+  SimVRegister result;
+
+  switch (instr->Mask(SVEFPComplexMulAddMask)) {
+    case FCMLA_z_p_zzz:
+      fcmla(vform, result, zn, zm, zda, rot);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zda, pg, result);
+}
+
+void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
+  SimVRegister& zda = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  int rot = instr->ExtractBits(11, 10);
+  unsigned zm_code = instr->GetRm();
+  int index = -1;
+  VectorFormat vform, vform_dup;
+
+  switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
+    case FCMLA_z_zzzi_h:
+      vform = kFormatVnH;
+      vform_dup = kFormatVnS;
+      index = zm_code >> 3;
+      zm_code &= 0x7;
+      break;
+    case FCMLA_z_zzzi_s:
+      vform = kFormatVnS;
+      vform_dup = kFormatVnD;
+      index = zm_code >> 4;
+      zm_code &= 0xf;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (index >= 0) {
+    SimVRegister temp;
+    dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
+    fcmla(vform, zda, zn, temp, zda, rot);
+  }
+}
+
+typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
+                                                  LogicVRegister dst,
+                                                  const LogicVRegister& src);
+
+void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& vd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  int lane_size = LaneSizeInBitsFromFormat(vform);
+
+  uint64_t inactive_value = 0;
+  FastReduceFn fn = nullptr;
+
+  switch (instr->Mask(SVEFPFastReductionMask)) {
+    case FADDV_v_p_z:
+      fn = &Simulator::faddv;
+      break;
+    case FMAXNMV_v_p_z:
+      inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+      fn = &Simulator::fmaxnmv;
+      break;
+    case FMAXV_v_p_z:
+      inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
+      fn = &Simulator::fmaxv;
+      break;
+    case FMINNMV_v_p_z:
+      inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
+      fn = &Simulator::fminnmv;
+      break;
+    case FMINV_v_p_z:
+      inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
+      fn = &Simulator::fminv;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister scratch;
+  dup_immediate(vform, scratch, inactive_value);
+  mov_merging(vform, scratch, pg, zn);
+  if (fn != nullptr) (this->*fn)(vform, vd, scratch);
+}
+
+void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
+  VectorFormat vform = kFormatUndefined;
+  unsigned zm_code = instr->GetRm() & 0xf;
+  unsigned index = instr->ExtractBits(20, 19);
+
+  switch (instr->Mask(SVEFPMulIndexMask)) {
+    case FMUL_z_zzi_d:
+      vform = kFormatVnD;
+      index >>= 1;  // Only bit 20 is the index for D lanes.
+      break;
+    case FMUL_z_zzi_h_i3h:
+      index += 4;  // Bit 22 (i3h) is the top bit of index.
+      VIXL_FALLTHROUGH();
+    case FMUL_z_zzi_h:
+      vform = kFormatVnH;
+      zm_code &= 7;  // Three bits used for zm.
+      break;
+    case FMUL_z_zzi_s:
+      vform = kFormatVnS;
+      zm_code &= 7;  // Three bits used for zm.
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister temp;
+
+  dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+  fmul(vform, zd, zn, temp);
+}
+
+void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  if (instr->ExtractBit(15) == 0) {
+    // Floating-point multiply-accumulate writing addend.
+    SimVRegister& zm = ReadVRegister(instr->GetRm());
+    SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+    switch (instr->Mask(SVEFPMulAddMask)) {
+      // zda = zda + zn * zm
+      case FMLA_z_p_zzz:
+        fmla(vform, result, zd, zn, zm);
+        break;
+      // zda = -zda + -zn * zm
+      case FNMLA_z_p_zzz:
+        fneg(vform, result, zd);
+        fmls(vform, result, result, zn, zm);
+        break;
+      // zda = zda + -zn * zm
+      case FMLS_z_p_zzz:
+        fmls(vform, result, zd, zn, zm);
+        break;
+      // zda = -zda + zn * zm
+      case FNMLS_z_p_zzz:
+        fneg(vform, result, zd);
+        fmla(vform, result, result, zn, zm);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  } else {
+    // Floating-point multiply-accumulate writing multiplicand.
+    SimVRegister& za = ReadVRegister(instr->GetRm());
+    SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+    switch (instr->Mask(SVEFPMulAddMask)) {
+      // zdn = za + zdn * zm
+      case FMAD_z_p_zzz:
+        fmla(vform, result, za, zd, zm);
+        break;
+      // zdn = -za + -zdn * zm
+      case FNMAD_z_p_zzz:
+        fneg(vform, result, za);
+        fmls(vform, result, result, zd, zm);
+        break;
+      // zdn = za + -zdn * zm
+      case FMSB_z_p_zzz:
+        fmls(vform, result, za, zd, zm);
+        break;
+      // zdn = -za + zdn * zm
+      case FNMSB_z_p_zzz:
+        fneg(vform, result, za);
+        fmla(vform, result, result, zd, zm);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  }
+
+  mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
+  VectorFormat vform = kFormatUndefined;
+  unsigned zm_code = 0xffffffff;
+  unsigned index = 0xffffffff;
+
+  switch (instr->Mask(SVEFPMulAddIndexMask)) {
+    case FMLA_z_zzzi_d:
+    case FMLS_z_zzzi_d:
+      vform = kFormatVnD;
+      zm_code = instr->GetRmLow16();
+      // Only bit 20 is the index for D lanes.
+      index = instr->ExtractBit(20);
+      break;
+    case FMLA_z_zzzi_s:
+    case FMLS_z_zzzi_s:
+      vform = kFormatVnS;
+      zm_code = instr->GetRm() & 0x7;  // Three bits used for zm.
+      index = instr->ExtractBits(20, 19);
+      break;
+    case FMLA_z_zzzi_h:
+    case FMLS_z_zzzi_h:
+    case FMLA_z_zzzi_h_i3h:
+    case FMLS_z_zzzi_h_i3h:
+      vform = kFormatVnH;
+      zm_code = instr->GetRm() & 0x7;  // Three bits used for zm.
+      index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister temp;
+
+  dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+  if (instr->ExtractBit(10) == 1) {
+    fmls(vform, zd, zd, zn, temp);
+  } else {
+    fmla(vform, zd, zd, zn, temp);
+  }
+}
+
+void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  int dst_data_size;
+  int src_data_size;
+
+  switch (instr->Mask(SVEFPConvertToIntMask)) {
+    case FCVTZS_z_p_z_d2w:
+    case FCVTZU_z_p_z_d2w:
+      dst_data_size = kSRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case FCVTZS_z_p_z_d2x:
+    case FCVTZU_z_p_z_d2x:
+      dst_data_size = kDRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case FCVTZS_z_p_z_fp162h:
+    case FCVTZU_z_p_z_fp162h:
+      dst_data_size = kHRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case FCVTZS_z_p_z_fp162w:
+    case FCVTZU_z_p_z_fp162w:
+      dst_data_size = kSRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case FCVTZS_z_p_z_fp162x:
+    case FCVTZU_z_p_z_fp162x:
+      dst_data_size = kDRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case FCVTZS_z_p_z_s2w:
+    case FCVTZU_z_p_z_s2w:
+      dst_data_size = kSRegSize;
+      src_data_size = kSRegSize;
+      break;
+    case FCVTZS_z_p_z_s2x:
+    case FCVTZU_z_p_z_s2x:
+      dst_data_size = kDRegSize;
+      src_data_size = kSRegSize;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      dst_data_size = 0;
+      src_data_size = 0;
+      break;
+  }
+
+  VectorFormat vform =
+      SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+  if (instr->ExtractBit(16) == 0) {
+    fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
+  } else {
+    fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
+  }
+}
+
+void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  int dst_data_size;
+  int src_data_size;
+
+  switch (instr->Mask(SVEFPConvertPrecisionMask)) {
+    case FCVT_z_p_z_d2h:
+      dst_data_size = kHRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case FCVT_z_p_z_d2s:
+      dst_data_size = kSRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case FCVT_z_p_z_h2d:
+      dst_data_size = kDRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case FCVT_z_p_z_h2s:
+      dst_data_size = kSRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case FCVT_z_p_z_s2d:
+      dst_data_size = kDRegSize;
+      src_data_size = kSRegSize;
+      break;
+    case FCVT_z_p_z_s2h:
+      dst_data_size = kHRegSize;
+      src_data_size = kSRegSize;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      dst_data_size = 0;
+      src_data_size = 0;
+      break;
+  }
+  VectorFormat vform =
+      SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+  fcvt(vform, dst_data_size, src_data_size, zd, pg, zn);
+}
+
+void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister result;
+
+  switch (instr->Mask(SVEFPUnaryOpMask)) {
+    case FRECPX_z_p_z:
+      frecpx(vform, result, zn);
+      break;
+    case FSQRT_z_p_z:
+      fsqrt(vform, result, zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+  bool exact_exception = false;
+
+  switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
+    case FRINTA_z_p_z:
+      fpcr_rounding = FPTieAway;
+      break;
+    case FRINTI_z_p_z:
+      break;  // Use FPCR rounding mode.
+    case FRINTM_z_p_z:
+      fpcr_rounding = FPNegativeInfinity;
+      break;
+    case FRINTN_z_p_z:
+      fpcr_rounding = FPTieEven;
+      break;
+    case FRINTP_z_p_z:
+      fpcr_rounding = FPPositiveInfinity;
+      break;
+    case FRINTX_z_p_z:
+      exact_exception = true;
+      break;
+    case FRINTZ_z_p_z:
+      fpcr_rounding = FPZero;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister result;
+  frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
+  mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+  int dst_data_size;
+  int src_data_size;
+
+  switch (instr->Mask(SVEIntConvertToFPMask)) {
+    case SCVTF_z_p_z_h2fp16:
+    case UCVTF_z_p_z_h2fp16:
+      dst_data_size = kHRegSize;
+      src_data_size = kHRegSize;
+      break;
+    case SCVTF_z_p_z_w2d:
+    case UCVTF_z_p_z_w2d:
+      dst_data_size = kDRegSize;
+      src_data_size = kSRegSize;
+      break;
+    case SCVTF_z_p_z_w2fp16:
+    case UCVTF_z_p_z_w2fp16:
+      dst_data_size = kHRegSize;
+      src_data_size = kSRegSize;
+      break;
+    case SCVTF_z_p_z_w2s:
+    case UCVTF_z_p_z_w2s:
+      dst_data_size = kSRegSize;
+      src_data_size = kSRegSize;
+      break;
+    case SCVTF_z_p_z_x2d:
+    case UCVTF_z_p_z_x2d:
+      dst_data_size = kDRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case SCVTF_z_p_z_x2fp16:
+    case UCVTF_z_p_z_x2fp16:
+      dst_data_size = kHRegSize;
+      src_data_size = kDRegSize;
+      break;
+    case SCVTF_z_p_z_x2s:
+    case UCVTF_z_p_z_x2s:
+      dst_data_size = kSRegSize;
+      src_data_size = kDRegSize;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      dst_data_size = 0;
+      src_data_size = 0;
+      break;
+  }
+
+  VectorFormat vform =
+      SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
+
+  if (instr->ExtractBit(16) == 0) {
+    scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
+  } else {
+    ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
+  }
+}
+
+void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+
+  switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
+    case FRECPE_z_z:
+      frecpe(vform, zd, zn, fpcr_rounding);
+      break;
+    case FRSQRTE_z_z:
+      frsqrte(vform, zd, zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
+
+  int count = CountActiveLanes(vform, pg);
+
+  if (instr->ExtractBit(11) == 0) {
+    SimVRegister& zdn = ReadVRegister(instr->GetRd());
+    switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+      case DECP_z_p_z:
+        sub_uint(vform, zdn, zdn, count);
+        break;
+      case INCP_z_p_z:
+        add_uint(vform, zdn, zdn, count);
+        break;
+      case SQDECP_z_p_z:
+        sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
+        break;
+      case SQINCP_z_p_z:
+        add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
+        break;
+      case UQDECP_z_p_z:
+        sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
+        break;
+      case UQINCP_z_p_z:
+        add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  } else {
+    bool is_saturating = (instr->ExtractBit(18) == 0);
+    bool decrement =
+        is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
+    bool is_signed = (instr->ExtractBit(16) == 0);
+    bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
+    unsigned width = sf ? kXRegSize : kWRegSize;
+
+    switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+      case DECP_r_p_r:
+      case INCP_r_p_r:
+      case SQDECP_r_p_r_sx:
+      case SQDECP_r_p_r_x:
+      case SQINCP_r_p_r_sx:
+      case SQINCP_r_p_r_x:
+      case UQDECP_r_p_r_uw:
+      case UQDECP_r_p_r_x:
+      case UQINCP_r_p_r_uw:
+      case UQINCP_r_p_r_x:
+        WriteXRegister(instr->GetRd(),
+                       IncDecN(ReadXRegister(instr->GetRd()),
+                               decrement ? -count : count,
+                               width,
+                               is_saturating,
+                               is_signed));
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  }
+}
+
+uint64_t Simulator::IncDecN(uint64_t acc,
+                            int64_t delta,
+                            unsigned n,
+                            bool is_saturating,
+                            bool is_signed) {
+  VIXL_ASSERT(n <= 64);
+  VIXL_ASSERT(IsIntN(n, delta));
+
+  uint64_t sign_mask = UINT64_C(1) << (n - 1);
+  uint64_t mask = GetUintMask(n);
+
+  acc &= mask;  // Ignore initial accumulator high bits.
+  uint64_t result = (acc + delta) & mask;
+
+  bool result_negative = ((result & sign_mask) != 0);
+
+  if (is_saturating) {
+    if (is_signed) {
+      bool acc_negative = ((acc & sign_mask) != 0);
+      bool delta_negative = delta < 0;
+
+      // If the signs of the operands are the same, but different from the
+      // result, there was an overflow.
+      if ((acc_negative == delta_negative) &&
+          (acc_negative != result_negative)) {
+        if (result_negative) {
+          // Saturate to [..., INT<n>_MAX].
+          result_negative = false;
+          result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
+        } else {
+          // Saturate to [INT<n>_MIN, ...].
+          result_negative = true;
+          result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
+        }
+      }
+    } else {
+      if ((delta < 0) && (result > acc)) {
+        // Saturate to [0, ...].
+        result = 0;
+      } else if ((delta > 0) && (result < acc)) {
+        // Saturate to [..., UINT<n>_MAX].
+        result = mask;
+      }
+    }
+  }
+
+  // Sign-extend if necessary.
+  if (result_negative && is_signed) result |= ~mask;
+
+  return result;
+}
+
+void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  switch (instr->Mask(SVEIndexGenerationMask)) {
+    case INDEX_z_ii:
+    case INDEX_z_ir:
+    case INDEX_z_ri:
+    case INDEX_z_rr: {
+      uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
+                                             : instr->ExtractSignedBits(9, 5);
+      uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
+                                            : instr->ExtractSignedBits(20, 16);
+      index(vform, zd, start, step);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+  switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
+    case ADD_z_zz:
+      add(vform, zd, zn, zm);
+      break;
+    case SQADD_z_zz:
+      add(vform, zd, zn, zm).SignedSaturate(vform);
+      break;
+    case SQSUB_z_zz:
+      sub(vform, zd, zn, zm).SignedSaturate(vform);
+      break;
+    case SUB_z_zz:
+      sub(vform, zd, zn, zm);
+      break;
+    case UQADD_z_zz:
+      add(vform, zd, zn, zm).UnsignedSaturate(vform);
+      break;
+    case UQSUB_z_zz:
+      sub(vform, zd, zn, zm).UnsignedSaturate(vform);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
+    case ADD_z_p_zz:
+      add(vform, result, zdn, zm);
+      break;
+    case SUBR_z_p_zz:
+      sub(vform, result, zm, zdn);
+      break;
+    case SUB_z_p_zz:
+      sub(vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
+    case AND_z_p_zz:
+      SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
+      break;
+    case BIC_z_p_zz:
+      SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
+      break;
+    case EOR_z_p_zz:
+      SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
+      break;
+    case ORR_z_p_zz:
+      SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
+    case MUL_z_p_zz:
+      mul(vform, result, zdn, zm);
+      break;
+    case SMULH_z_p_zz:
+      smulh(vform, result, zdn, zm);
+      break;
+    case UMULH_z_p_zz:
+      umulh(vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMinMaxDifference_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
+    case SABD_z_p_zz:
+      absdiff(vform, result, zdn, zm, true);
+      break;
+    case SMAX_z_p_zz:
+      smax(vform, result, zdn, zm);
+      break;
+    case SMIN_z_p_zz:
+      smin(vform, result, zdn, zm);
+      break;
+    case UABD_z_p_zz:
+      absdiff(vform, result, zdn, zm, false);
+      break;
+    case UMAX_z_p_zz:
+      umax(vform, result, zdn, zm);
+      break;
+    case UMIN_z_p_zz:
+      umin(vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister scratch;
+
+  switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
+    case MUL_z_zi:
+      dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
+      mul(vform, zd, zd, scratch);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
+
+  switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
+    case SDIVR_z_p_zz:
+      sdiv(vform, result, zm, zdn);
+      break;
+    case SDIV_z_p_zz:
+      sdiv(vform, result, zdn, zm);
+      break;
+    case UDIVR_z_p_zz:
+      udiv(vform, result, zm, zdn);
+      break;
+    case UDIV_z_p_zz:
+      udiv(vform, result, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister scratch;
+
+  uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
+  int64_t signed_imm = instr->GetImmSVEIntWideSigned();
+
+  switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
+    case SMAX_z_zi:
+      dup_immediate(vform, scratch, signed_imm);
+      smax(vform, zd, zd, scratch);
+      break;
+    case SMIN_z_zi:
+      dup_immediate(vform, scratch, signed_imm);
+      smin(vform, zd, zd, scratch);
+      break;
+    case UMAX_z_zi:
+      dup_immediate(vform, scratch, unsigned_imm);
+      umax(vform, zd, zd, scratch);
+      break;
+    case UMIN_z_zi:
+      dup_immediate(vform, scratch, unsigned_imm);
+      umin(vform, zd, zd, scratch);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntCompareScalarCountAndLimit(
+    const Instruction* instr) {
+  unsigned rn_code = instr->GetRn();
+  unsigned rm_code = instr->GetRm();
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  bool is_64_bit = instr->ExtractBit(12) == 1;
+  int64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
+  int64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
+
+  bool last = true;
+  for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+    bool cond = false;
+    switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
+      case WHILELE_p_p_rr:
+        cond = src1 <= src2;
+        break;
+      case WHILELO_p_p_rr:
+        cond = static_cast<uint64_t>(src1) < static_cast<uint64_t>(src2);
+        break;
+      case WHILELS_p_p_rr:
+        cond = static_cast<uint64_t>(src1) <= static_cast<uint64_t>(src2);
+        break;
+      case WHILELT_p_p_rr:
+        cond = src1 < src2;
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+    last = last && cond;
+    LogicPRegister dst(pd);
+    dst.SetActive(vform, lane, last);
+    src1 += 1;
+  }
+
+  PredTest(vform, GetPTrue(), pd);
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::VisitSVEConditionallyTerminateScalars(
+    const Instruction* instr) {
+  unsigned rn_code = instr->GetRn();
+  unsigned rm_code = instr->GetRm();
+  bool is_64_bit = instr->ExtractBit(22) == 1;
+  uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
+  uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
+  bool term;
+  switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
+    case CTERMEQ_rr:
+      term = src1 == src2;
+      break;
+    case CTERMNE_rr:
+      term = src1 != src2;
+      break;
+    default:
+      term = false;
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  ReadNzcv().SetN(term ? 1 : 0);
+  ReadNzcv().SetV(term ? 0 : !ReadC());
+  LogSystemRegister(NZCV);
+}
+
+void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
+  bool commute_inputs = false;
+  Condition cond;
+  switch (instr->Mask(SVEIntCompareSignedImmMask)) {
+    case CMPEQ_p_p_zi:
+      cond = eq;
+      break;
+    case CMPGE_p_p_zi:
+      cond = ge;
+      break;
+    case CMPGT_p_p_zi:
+      cond = gt;
+      break;
+    case CMPLE_p_p_zi:
+      cond = ge;
+      commute_inputs = true;
+      break;
+    case CMPLT_p_p_zi:
+      cond = gt;
+      commute_inputs = true;
+      break;
+    case CMPNE_p_p_zi:
+      cond = ne;
+      break;
+    default:
+      cond = al;
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister src2;
+  dup_immediate(vform,
+                src2,
+                ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
+  SVEIntCompareVectorsHelper(cond,
+                             vform,
+                             ReadPRegister(instr->GetPd()),
+                             ReadPRegister(instr->GetPgLow8()),
+                             commute_inputs ? src2
+                                            : ReadVRegister(instr->GetRn()),
+                             commute_inputs ? ReadVRegister(instr->GetRn())
+                                            : src2);
+}
+
+void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
+  bool commute_inputs = false;
+  Condition cond;
+  switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
+    case CMPHI_p_p_zi:
+      cond = hi;
+      break;
+    case CMPHS_p_p_zi:
+      cond = hs;
+      break;
+    case CMPLO_p_p_zi:
+      cond = hi;
+      commute_inputs = true;
+      break;
+    case CMPLS_p_p_zi:
+      cond = hs;
+      commute_inputs = true;
+      break;
+    default:
+      cond = al;
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister src2;
+  dup_immediate(vform, src2, instr->ExtractBits(20, 14));
+  SVEIntCompareVectorsHelper(cond,
+                             vform,
+                             ReadPRegister(instr->GetPd()),
+                             ReadPRegister(instr->GetPgLow8()),
+                             commute_inputs ? src2
+                                            : ReadVRegister(instr->GetRn()),
+                             commute_inputs ? ReadVRegister(instr->GetRn())
+                                            : src2);
+}
+
+void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
+  Instr op = instr->Mask(SVEIntCompareVectorsMask);
+  bool is_wide_elements = false;
+  switch (op) {
+    case CMPEQ_p_p_zw:
+    case CMPGE_p_p_zw:
+    case CMPGT_p_p_zw:
+    case CMPHI_p_p_zw:
+    case CMPHS_p_p_zw:
+    case CMPLE_p_p_zw:
+    case CMPLO_p_p_zw:
+    case CMPLS_p_p_zw:
+    case CMPLT_p_p_zw:
+    case CMPNE_p_p_zw:
+      is_wide_elements = true;
+      break;
+  }
+
+  Condition cond;
+  switch (op) {
+    case CMPEQ_p_p_zw:
+    case CMPEQ_p_p_zz:
+      cond = eq;
+      break;
+    case CMPGE_p_p_zw:
+    case CMPGE_p_p_zz:
+      cond = ge;
+      break;
+    case CMPGT_p_p_zw:
+    case CMPGT_p_p_zz:
+      cond = gt;
+      break;
+    case CMPHI_p_p_zw:
+    case CMPHI_p_p_zz:
+      cond = hi;
+      break;
+    case CMPHS_p_p_zw:
+    case CMPHS_p_p_zz:
+      cond = hs;
+      break;
+    case CMPNE_p_p_zw:
+    case CMPNE_p_p_zz:
+      cond = ne;
+      break;
+    case CMPLE_p_p_zw:
+      cond = le;
+      break;
+    case CMPLO_p_p_zw:
+      cond = lo;
+      break;
+    case CMPLS_p_p_zw:
+      cond = ls;
+      break;
+    case CMPLT_p_p_zw:
+      cond = lt;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      cond = al;
+      break;
+  }
+
+  SVEIntCompareVectorsHelper(cond,
+                             instr->GetSVEVectorFormat(),
+                             ReadPRegister(instr->GetPd()),
+                             ReadPRegister(instr->GetPgLow8()),
+                             ReadVRegister(instr->GetRn()),
+                             ReadVRegister(instr->GetRm()),
+                             is_wide_elements);
+}
+
+void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+  VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
+              (vform == kFormatVnD));
+
+  switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
+    case FEXPA_z_z:
+      fexpa(vform, zd, zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
+              (vform == kFormatVnD));
+
+  switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
+    case FTSSEL_z_zz:
+      ftssel(vform, zd, zn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEConstructivePrefix_Unpredicated(
+    const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+  switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
+    case MOVPRFX_z_z:
+      mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
+      // Record the movprfx, so the next ExecuteInstruction() can check it.
+      movprfx_ = instr;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  SimVRegister result;
+  switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
+    case MLA_z_p_zzz:
+      mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
+      break;
+    case MLS_z_p_zzz:
+      mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
+      break;
+    case MAD_z_p_zzz:
+      // 'za' is encoded in 'Rn'.
+      mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
+      break;
+    case MSB_z_p_zzz: {
+      // 'za' is encoded in 'Rn'.
+      mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
+}
+
+void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zda = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
+    case SDOT_z_zzz:
+      sdot(vform, zda, zn, zm);
+      break;
+    case UDOT_z_zzz:
+      udot(vform, zda, zn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEMovprfx(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+  switch (instr->Mask(SVEMovprfxMask)) {
+    case MOVPRFX_z_p_z:
+      if (instr->ExtractBit(16)) {
+        mov_merging(vform, zd, pg, zn);
+      } else {
+        mov_zeroing(vform, zd, pg, zn);
+      }
+
+      // Record the movprfx, so the next ExecuteInstruction() can check it.
+      movprfx_ = instr;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEIntReduction(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& vd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
+    switch (instr->Mask(SVEIntReductionLogicalMask)) {
+      case ANDV_r_p_z:
+        andv(vform, vd, pg, zn);
+        break;
+      case EORV_r_p_z:
+        eorv(vform, vd, pg, zn);
+        break;
+      case ORV_r_p_z:
+        orv(vform, vd, pg, zn);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  } else {
+    switch (instr->Mask(SVEIntReductionMask)) {
+      case SADDV_r_p_z:
+        saddv(vform, vd, pg, zn);
+        break;
+      case SMAXV_r_p_z:
+        smaxv(vform, vd, pg, zn);
+        break;
+      case SMINV_r_p_z:
+        sminv(vform, vd, pg, zn);
+        break;
+      case UADDV_r_p_z:
+        uaddv(vform, vd, pg, zn);
+        break;
+      case UMAXV_r_p_z:
+        umaxv(vform, vd, pg, zn);
+        break;
+      case UMINV_r_p_z:
+        uminv(vform, vd, pg, zn);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+        break;
+    }
+  }
+}
+
+void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+  SimVRegister result;
+  switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
+    case ABS_z_p_z:
+      abs(vform, result, zn);
+      break;
+    case CLS_z_p_z:
+      cls(vform, result, zn);
+      break;
+    case CLZ_z_p_z:
+      clz(vform, result, zn);
+      break;
+    case CNOT_z_p_z:
+      cnot(vform, result, zn);
+      break;
+    case CNT_z_p_z:
+      cnt(vform, result, zn);
+      break;
+    case FABS_z_p_z:
+      fabs_(vform, result, zn);
+      break;
+    case FNEG_z_p_z:
+      fneg(vform, result, zn);
+      break;
+    case NEG_z_p_z:
+      neg(vform, result, zn);
+      break;
+    case NOT_z_p_z:
+      not_(vform, result, zn);
+      break;
+    case SXTB_z_p_z:
+    case SXTH_z_p_z:
+    case SXTW_z_p_z:
+      sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
+      break;
+    case UXTB_z_p_z:
+    case UXTH_z_p_z:
+    case UXTW_z_p_z:
+      uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
+  // There is only one instruction in this group.
+  VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
+
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+  SimVRegister result;
+  switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
+    case FCPY_z_p_i: {
+      int imm8 = instr->ExtractBits(12, 5);
+      uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
+                                           Instruction::Imm8ToFP64(imm8));
+      dup_immediate(vform, result, value);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister scratch;
+
+  uint64_t imm = instr->GetImmSVEIntWideUnsigned();
+  imm <<= instr->ExtractBit(13) * 8;
+
+  switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
+    case ADD_z_zi:
+      add_uint(vform, zd, zd, imm);
+      break;
+    case SQADD_z_zi:
+      add_uint(vform, zd, zd, imm).SignedSaturate(vform);
+      break;
+    case SQSUB_z_zi:
+      sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
+      break;
+    case SUBR_z_zi:
+      dup_immediate(vform, scratch, imm);
+      sub(vform, zd, scratch, zd);
+      break;
+    case SUB_z_zi:
+      sub_uint(vform, zd, zd, imm);
+      break;
+    case UQADD_z_zi:
+      add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
+      break;
+    case UQSUB_z_zi:
+      sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
+      break;
+    default:
+      break;
+  }
+}
+
+void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+  VectorFormat format = instr->GetSVEVectorFormat();
+  int64_t imm = instr->GetImmSVEIntWideSigned();
+  int shift = instr->ExtractBit(13) * 8;
+  imm *= 1 << shift;
+
+  switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
+    case DUP_z_i:
+      // The encoding of byte-sized lanes with lsl #8 is undefined.
+      if ((format == kFormatVnB) && (shift == 8)) {
+        VIXL_UNIMPLEMENTED();
+      } else {
+        dup_immediate(format, zd, imm);
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+  switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
+    case FDUP_z_i:
+      switch (vform) {
+        case kFormatVnH:
+          dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
+          break;
+        case kFormatVnS:
+          dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
+          break;
+        case kFormatVnD:
+          dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
+          break;
+        default:
+          VIXL_UNIMPLEMENTED();
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(
+      SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
+    case LD1H_z_p_bz_s_x32_scaled:
+    case LD1SH_z_p_bz_s_x32_scaled:
+    case LDFF1H_z_p_bz_s_x32_scaled:
+    case LDFF1SH_z_p_bz_s_x32_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_s_x32_unscaled:
+    case LD1H_z_p_bz_s_x32_unscaled:
+    case LD1SB_z_p_bz_s_x32_unscaled:
+    case LD1SH_z_p_bz_s_x32_unscaled:
+    case LD1W_z_p_bz_s_x32_unscaled:
+    case LDFF1B_z_p_bz_s_x32_unscaled:
+    case LDFF1H_z_p_bz_s_x32_unscaled:
+    case LDFF1SB_z_p_bz_s_x32_unscaled:
+    case LDFF1SH_z_p_bz_s_x32_unscaled:
+    case LDFF1W_z_p_bz_s_x32_unscaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
+    case LD1B_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LD1H_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LD1SB_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LD1SH_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LD1W_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LDFF1B_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LDFF1H_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LDFF1SB_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LDFF1SH_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    case LDFF1W_z_p_ai_s:
+      VIXL_UNIMPLEMENTED();
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
+    const Instruction* instr) {
+  switch (
+      instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
+    case LD1W_z_p_bz_s_x32_scaled:
+    case LDFF1W_z_p_bz_s_x32_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
+}
+
+void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
+    const Instruction* instr) {
+  switch (
+      instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_bz_s_x32_scaled:
+    case PRFD_i_p_bz_s_x32_scaled:
+    case PRFH_i_p_bz_s_x32_scaled:
+    case PRFW_i_p_bz_s_x32_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_ai_s:
+    case PRFD_i_p_ai_s:
+    case PRFH_i_p_ai_s:
+    case PRFW_i_p_ai_s:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_bi_s:
+    case PRFD_i_p_bi_s:
+    case PRFH_i_p_bi_s:
+    case PRFW_i_p_bi_s:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_br_s:
+    case PRFD_i_p_br_s:
+    case PRFH_i_p_br_s:
+    case PRFW_i_p_br_s:
+      if (instr->GetRm() == kZeroRegCode) {
+        VIXL_UNIMPLEMENTED();
+      }
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
+  bool is_signed;
+  switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
+    case LD1RB_z_p_bi_u8:
+    case LD1RB_z_p_bi_u16:
+    case LD1RB_z_p_bi_u32:
+    case LD1RB_z_p_bi_u64:
+    case LD1RH_z_p_bi_u16:
+    case LD1RH_z_p_bi_u32:
+    case LD1RH_z_p_bi_u64:
+    case LD1RW_z_p_bi_u32:
+    case LD1RW_z_p_bi_u64:
+    case LD1RD_z_p_bi_u64:
+      is_signed = false;
+      break;
+    case LD1RSB_z_p_bi_s16:
+    case LD1RSB_z_p_bi_s32:
+    case LD1RSB_z_p_bi_s64:
+    case LD1RSH_z_p_bi_s32:
+    case LD1RSH_z_p_bi_s64:
+    case LD1RSW_z_p_bi_s64:
+      is_signed = true;
+      break;
+    default:
+      // This encoding group is complete, so no other values should be possible.
+      VIXL_UNREACHABLE();
+      is_signed = false;
+      break;
+  }
+
+  int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+  int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
+  VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
+  uint64_t base = ReadXRegister(instr->GetRn()) + offset;
+  VectorFormat unpack_vform =
+      SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
+  SimVRegister temp;
+  ld1r(vform, unpack_vform, temp, base, is_signed);
+  mov_zeroing(vform,
+              ReadVRegister(instr->GetRt()),
+              ReadPRegister(instr->GetPgLow8()),
+              temp);
+}
+
+void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
+  switch (instr->Mask(SVELoadPredicateRegisterMask)) {
+    case LDR_p_bi: {
+      SimPRegister& pt = ReadPRegister(instr->GetPt());
+      int pl = GetPredicateLengthInBytes();
+      int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+      uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+      for (int i = 0; i < pl; i++) {
+        pt.Insert(i, Memory::Read<uint8_t>(address + i));
+      }
+      LogPRead(instr->GetPt(), address);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
+  switch (instr->Mask(SVELoadVectorRegisterMask)) {
+    case LDR_z_bi: {
+      SimVRegister& zt = ReadVRegister(instr->GetRt());
+      int vl = GetVectorLengthInBytes();
+      int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+      uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+      for (int i = 0; i < vl; i++) {
+        zt.Insert(i, Memory::Read<uint8_t>(address + i));
+      }
+      LogZRead(instr->GetRt(), address);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(
+      SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
+    case LD1D_z_p_bz_d_x32_scaled:
+    case LD1H_z_p_bz_d_x32_scaled:
+    case LD1SH_z_p_bz_d_x32_scaled:
+    case LD1SW_z_p_bz_d_x32_scaled:
+    case LD1W_z_p_bz_d_x32_scaled:
+    case LDFF1H_z_p_bz_d_x32_scaled:
+    case LDFF1W_z_p_bz_d_x32_scaled:
+    case LDFF1D_z_p_bz_d_x32_scaled:
+    case LDFF1SH_z_p_bz_d_x32_scaled:
+    case LDFF1SW_z_p_bz_d_x32_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
+    case LD1D_z_p_bz_d_64_scaled:
+    case LD1H_z_p_bz_d_64_scaled:
+    case LD1SH_z_p_bz_d_64_scaled:
+    case LD1SW_z_p_bz_d_64_scaled:
+    case LD1W_z_p_bz_d_64_scaled:
+    case LDFF1H_z_p_bz_d_64_scaled:
+    case LDFF1W_z_p_bz_d_64_scaled:
+    case LDFF1D_z_p_bz_d_64_scaled:
+    case LDFF1SH_z_p_bz_d_64_scaled:
+    case LDFF1SW_z_p_bz_d_64_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_d_64_unscaled:
+    case LD1D_z_p_bz_d_64_unscaled:
+    case LD1H_z_p_bz_d_64_unscaled:
+    case LD1SB_z_p_bz_d_64_unscaled:
+    case LD1SH_z_p_bz_d_64_unscaled:
+    case LD1SW_z_p_bz_d_64_unscaled:
+    case LD1W_z_p_bz_d_64_unscaled:
+    case LDFF1B_z_p_bz_d_64_unscaled:
+    case LDFF1D_z_p_bz_d_64_unscaled:
+    case LDFF1H_z_p_bz_d_64_unscaled:
+    case LDFF1SB_z_p_bz_d_64_unscaled:
+    case LDFF1SH_z_p_bz_d_64_unscaled:
+    case LDFF1SW_z_p_bz_d_64_unscaled:
+    case LDFF1W_z_p_bz_d_64_unscaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEGatherLoadScalarPlusVectorHelper(instr,
+                                      kFormatVnD,
+                                      NO_SVE_OFFSET_MODIFIER);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(
+      SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+    case LD1B_z_p_bz_d_x32_unscaled:
+    case LD1D_z_p_bz_d_x32_unscaled:
+    case LD1H_z_p_bz_d_x32_unscaled:
+    case LD1SB_z_p_bz_d_x32_unscaled:
+    case LD1SH_z_p_bz_d_x32_unscaled:
+    case LD1SW_z_p_bz_d_x32_unscaled:
+    case LD1W_z_p_bz_d_x32_unscaled:
+    case LDFF1B_z_p_bz_d_x32_unscaled:
+    case LDFF1H_z_p_bz_d_x32_unscaled:
+    case LDFF1W_z_p_bz_d_x32_unscaled:
+    case LDFF1D_z_p_bz_d_x32_unscaled:
+    case LDFF1SB_z_p_bz_d_x32_unscaled:
+    case LDFF1SH_z_p_bz_d_x32_unscaled:
+    case LDFF1SW_z_p_bz_d_x32_unscaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
+  SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
+}
+
+void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
+    case LD1B_z_p_ai_d:
+    case LD1D_z_p_ai_d:
+    case LD1H_z_p_ai_d:
+    case LD1SB_z_p_ai_d:
+    case LD1SH_z_p_ai_d:
+    case LD1SW_z_p_ai_d:
+    case LD1W_z_p_ai_d:
+    case LDFF1B_z_p_ai_d:
+    case LDFF1D_z_p_ai_d:
+    case LDFF1H_z_p_ai_d:
+    case LDFF1SB_z_p_ai_d:
+    case LDFF1SH_z_p_ai_d:
+    case LDFF1SW_z_p_ai_d:
+    case LDFF1W_z_p_ai_d:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  bool is_signed = instr->ExtractBit(14) == 0;
+  bool is_ff = instr->ExtractBit(13) == 1;
+  // Note that these instructions don't use the Dtype encoding.
+  int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
+  uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
+  LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  if (is_ff) {
+    VIXL_UNIMPLEMENTED();
+  } else {
+    SVEStructuredLoadHelper(kFormatVnD,
+                            ReadPRegister(instr->GetPgLow8()),
+                            instr->GetRt(),
+                            addr,
+                            is_signed);
+  }
+}
+
+void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
+    const Instruction* instr) {
+  switch (
+      instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_bz_d_64_scaled:
+    case PRFD_i_p_bz_d_64_scaled:
+    case PRFH_i_p_bz_d_64_scaled:
+    case PRFW_i_p_bz_d_64_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::
+    VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
+        const Instruction* instr) {
+  switch (instr->Mask(
+      SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_bz_d_x32_scaled:
+    case PRFD_i_p_bz_d_x32_scaled:
+    case PRFH_i_p_bz_d_x32_scaled:
+    case PRFW_i_p_bz_d_x32_scaled:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
+    // Ignore prefetch hint instructions.
+    case PRFB_i_p_ai_d:
+    case PRFD_i_p_ai_d:
+    case PRFH_i_p_ai_d:
+    case PRFW_i_p_ai_d:
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
+    const Instruction* instr) {
+  bool is_signed;
+  switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+    case LDFF1B_z_p_br_u8:
+    case LDFF1B_z_p_br_u16:
+    case LDFF1B_z_p_br_u32:
+    case LDFF1B_z_p_br_u64:
+    case LDFF1H_z_p_br_u16:
+    case LDFF1H_z_p_br_u32:
+    case LDFF1H_z_p_br_u64:
+    case LDFF1W_z_p_br_u32:
+    case LDFF1W_z_p_br_u64:
+    case LDFF1D_z_p_br_u64:
+      is_signed = false;
+      break;
+    case LDFF1SB_z_p_br_s16:
+    case LDFF1SB_z_p_br_s32:
+    case LDFF1SB_z_p_br_s64:
+    case LDFF1SH_z_p_br_s32:
+    case LDFF1SH_z_p_br_s64:
+    case LDFF1SW_z_p_br_s64:
+      is_signed = true;
+      break;
+    default:
+      // This encoding group is complete, so no other values should be possible.
+      VIXL_UNREACHABLE();
+      is_signed = false;
+      break;
+  }
+
+  int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+  int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+  VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  uint64_t offset = ReadXRegister(instr->GetRm());
+  offset <<= msize_in_bytes_log2;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEFaultTolerantLoadHelper(vform,
+                             ReadPRegister(instr->GetPgLow8()),
+                             instr->GetRt(),
+                             addr,
+                             kSVEFirstFaultLoad,
+                             is_signed);
+}
+
+void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
+    const Instruction* instr) {
+  bool is_signed = false;
+  switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
+    case LDNF1B_z_p_bi_u16:
+    case LDNF1B_z_p_bi_u32:
+    case LDNF1B_z_p_bi_u64:
+    case LDNF1B_z_p_bi_u8:
+    case LDNF1D_z_p_bi_u64:
+    case LDNF1H_z_p_bi_u16:
+    case LDNF1H_z_p_bi_u32:
+    case LDNF1H_z_p_bi_u64:
+    case LDNF1W_z_p_bi_u32:
+    case LDNF1W_z_p_bi_u64:
+      break;
+    case LDNF1SB_z_p_bi_s16:
+    case LDNF1SB_z_p_bi_s32:
+    case LDNF1SB_z_p_bi_s64:
+    case LDNF1SH_z_p_bi_s32:
+    case LDNF1SH_z_p_bi_s64:
+    case LDNF1SW_z_p_bi_s64:
+      is_signed = true;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+  int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+  VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  int vl = GetVectorLengthInBytes();
+  int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+  uint64_t offset =
+      (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEFaultTolerantLoadHelper(vform,
+                             ReadPRegister(instr->GetPgLow8()),
+                             instr->GetRt(),
+                             addr,
+                             kSVENonFaultLoad,
+                             is_signed);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
+    const Instruction* instr) {
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = kFormatUndefined;
+
+  switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
+    case LDNT1B_z_p_bi_contiguous:
+      vform = kFormatVnB;
+      break;
+    case LDNT1D_z_p_bi_contiguous:
+      vform = kFormatVnD;
+      break;
+    case LDNT1H_z_p_bi_contiguous:
+      vform = kFormatVnH;
+      break;
+    case LDNT1W_z_p_bi_contiguous:
+      vform = kFormatVnS;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  int vl = GetVectorLengthInBytes();
+  uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredLoadHelper(vform,
+                          pg,
+                          instr->GetRt(),
+                          addr,
+                          /* is_signed = */ false);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
+    const Instruction* instr) {
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = kFormatUndefined;
+
+  switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
+    case LDNT1B_z_p_br_contiguous:
+      vform = kFormatVnB;
+      break;
+    case LDNT1D_z_p_br_contiguous:
+      vform = kFormatVnD;
+      break;
+    case LDNT1H_z_p_br_contiguous:
+      vform = kFormatVnH;
+      break;
+    case LDNT1W_z_p_br_contiguous:
+      vform = kFormatVnS;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredLoadHelper(vform,
+                          pg,
+                          instr->GetRt(),
+                          addr,
+                          /* is_signed = */ false);
+}
+
+void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+    const Instruction* instr) {
+  SimVRegister& zt = ReadVRegister(instr->GetRt());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t offset = instr->ExtractSignedBits(19, 16) * 16;
+
+  VectorFormat vform = kFormatUndefined;
+  switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
+    case LD1RQB_z_p_bi_u8:
+      vform = kFormatVnB;
+      break;
+    case LD1RQD_z_p_bi_u64:
+      vform = kFormatVnD;
+      break;
+    case LD1RQH_z_p_bi_u16:
+      vform = kFormatVnH;
+      break;
+    case LD1RQW_z_p_bi_u32:
+      vform = kFormatVnS;
+      break;
+    default:
+      addr = offset = 0;
+      break;
+  }
+  ld1(kFormat16B, zt, addr + offset);
+  mov_zeroing(vform, zt, pg, zt);
+  dup_element(kFormatVnQ, zt, zt, 0);
+}
+
+void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+    const Instruction* instr) {
+  SimVRegister& zt = ReadVRegister(instr->GetRt());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
+  uint64_t offset = ReadXRegister(instr->GetRm());
+
+  VectorFormat vform = kFormatUndefined;
+  switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
+    case LD1RQB_z_p_br_contiguous:
+      vform = kFormatVnB;
+      break;
+    case LD1RQD_z_p_br_contiguous:
+      vform = kFormatVnD;
+      offset <<= 3;
+      break;
+    case LD1RQH_z_p_br_contiguous:
+      vform = kFormatVnH;
+      offset <<= 1;
+      break;
+    case LD1RQW_z_p_br_contiguous:
+      vform = kFormatVnS;
+      offset <<= 2;
+      break;
+    default:
+      addr = offset = 0;
+      break;
+  }
+  ld1(kFormat16B, zt, addr + offset);
+  mov_zeroing(vform, zt, pg, zt);
+  dup_element(kFormatVnQ, zt, zt, 0);
+}
+
+void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
+    case LD2B_z_p_bi_contiguous:
+    case LD2D_z_p_bi_contiguous:
+    case LD2H_z_p_bi_contiguous:
+    case LD2W_z_p_bi_contiguous:
+    case LD3B_z_p_bi_contiguous:
+    case LD3D_z_p_bi_contiguous:
+    case LD3H_z_p_bi_contiguous:
+    case LD3W_z_p_bi_contiguous:
+    case LD4B_z_p_bi_contiguous:
+    case LD4D_z_p_bi_contiguous:
+    case LD4H_z_p_bi_contiguous:
+    case LD4W_z_p_bi_contiguous: {
+      int vl = GetVectorLengthInBytes();
+      int msz = instr->ExtractBits(24, 23);
+      int reg_count = instr->ExtractBits(22, 21) + 1;
+      uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
+      LogicSVEAddressVector addr(
+          ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+      addr.SetMsizeInBytesLog2(msz);
+      addr.SetRegCount(reg_count);
+      SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
+                              ReadPRegister(instr->GetPgLow8()),
+                              instr->GetRt(),
+                              addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
+    const Instruction* instr) {
+  switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
+    case LD2B_z_p_br_contiguous:
+    case LD2D_z_p_br_contiguous:
+    case LD2H_z_p_br_contiguous:
+    case LD2W_z_p_br_contiguous:
+    case LD3B_z_p_br_contiguous:
+    case LD3D_z_p_br_contiguous:
+    case LD3H_z_p_br_contiguous:
+    case LD3W_z_p_br_contiguous:
+    case LD4B_z_p_br_contiguous:
+    case LD4D_z_p_br_contiguous:
+    case LD4H_z_p_br_contiguous:
+    case LD4W_z_p_br_contiguous: {
+      int msz = instr->ExtractBits(24, 23);
+      uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+      VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
+      LogicSVEAddressVector addr(
+          ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+      addr.SetMsizeInBytesLog2(msz);
+      addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
+      SVEStructuredLoadHelper(vform,
+                              ReadPRegister(instr->GetPgLow8()),
+                              instr->GetRt(),
+                              addr,
+                              false);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
+    case ST1H_z_p_bz_s_x32_scaled:
+    case ST1W_z_p_bz_s_x32_scaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+      uint64_t base = ReadXRegister(instr->GetRn());
+      SVEOffsetModifier mod =
+          (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnS,
+                                 mod,
+                                 scale);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnS,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
+    const Instruction* instr) {
+  switch (
+      instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_s_x32_unscaled:
+    case ST1H_z_p_bz_s_x32_unscaled:
+    case ST1W_z_p_bz_s_x32_unscaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      uint64_t base = ReadXRegister(instr->GetRn());
+      SVEOffsetModifier mod =
+          (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnS,
+                                 mod);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnS,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
+    const Instruction* instr) {
+  int msz = 0;
+  switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
+    case ST1B_z_p_ai_s:
+      msz = 0;
+      break;
+    case ST1H_z_p_ai_s:
+      msz = 1;
+      break;
+    case ST1W_z_p_ai_s:
+      msz = 2;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  uint64_t imm = instr->ExtractBits(20, 16) << msz;
+  LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
+  addr.SetMsizeInBytesLog2(msz);
+  SVEStructuredStoreHelper(kFormatVnS,
+                           ReadPRegister(instr->GetPgLow8()),
+                           instr->GetRt(),
+                           addr);
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
+    case ST1D_z_p_bz_d_64_scaled:
+    case ST1H_z_p_bz_d_64_scaled:
+    case ST1W_z_p_bz_d_64_scaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+      uint64_t base = ReadXRegister(instr->GetRn());
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnD,
+                                 SVE_LSL,
+                                 scale);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnD,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
+    const Instruction* instr) {
+  switch (
+      instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_d_64_unscaled:
+    case ST1D_z_p_bz_d_64_unscaled:
+    case ST1H_z_p_bz_d_64_unscaled:
+    case ST1W_z_p_bz_d_64_unscaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      uint64_t base = ReadXRegister(instr->GetRn());
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnD,
+                                 NO_SVE_OFFSET_MODIFIER);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnD,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
+    const Instruction* instr) {
+  switch (instr->Mask(
+      SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
+    case ST1D_z_p_bz_d_x32_scaled:
+    case ST1H_z_p_bz_d_x32_scaled:
+    case ST1W_z_p_bz_d_x32_scaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
+      uint64_t base = ReadXRegister(instr->GetRn());
+      SVEOffsetModifier mod =
+          (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnD,
+                                 mod,
+                                 scale);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnD,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::
+    VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
+        const Instruction* instr) {
+  switch (instr->Mask(
+      SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
+    case ST1B_z_p_bz_d_x32_unscaled:
+    case ST1D_z_p_bz_d_x32_unscaled:
+    case ST1H_z_p_bz_d_x32_unscaled:
+    case ST1W_z_p_bz_d_x32_unscaled: {
+      unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
+      uint64_t base = ReadXRegister(instr->GetRn());
+      SVEOffsetModifier mod =
+          (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
+      LogicSVEAddressVector addr(base,
+                                 &ReadVRegister(instr->GetRm()),
+                                 kFormatVnD,
+                                 mod);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(kFormatVnD,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
+    const Instruction* instr) {
+  int msz = 0;
+  switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
+    case ST1B_z_p_ai_d:
+      msz = 0;
+      break;
+    case ST1D_z_p_ai_d:
+      msz = 3;
+      break;
+    case ST1H_z_p_ai_d:
+      msz = 1;
+      break;
+    case ST1W_z_p_ai_d:
+      msz = 2;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  uint64_t imm = instr->ExtractBits(20, 16) << msz;
+  LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
+  addr.SetMsizeInBytesLog2(msz);
+  SVEStructuredStoreHelper(kFormatVnD,
+                           ReadPRegister(instr->GetPgLow8()),
+                           instr->GetRt(),
+                           addr);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
+    const Instruction* instr) {
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = kFormatUndefined;
+
+  switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
+    case STNT1B_z_p_bi_contiguous:
+      vform = kFormatVnB;
+      break;
+    case STNT1D_z_p_bi_contiguous:
+      vform = kFormatVnD;
+      break;
+    case STNT1H_z_p_bi_contiguous:
+      vform = kFormatVnH;
+      break;
+    case STNT1W_z_p_bi_contiguous:
+      vform = kFormatVnS;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  int vl = GetVectorLengthInBytes();
+  uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
+}
+
+void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
+    const Instruction* instr) {
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  VectorFormat vform = kFormatUndefined;
+
+  switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
+    case STNT1B_z_p_br_contiguous:
+      vform = kFormatVnB;
+      break;
+    case STNT1D_z_p_br_contiguous:
+      vform = kFormatVnD;
+      break;
+    case STNT1H_z_p_br_contiguous:
+      vform = kFormatVnH;
+      break;
+    case STNT1W_z_p_br_contiguous:
+      vform = kFormatVnS;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
+  uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
+}
+
+void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
+    case ST1B_z_p_bi:
+    case ST1D_z_p_bi:
+    case ST1H_z_p_bi:
+    case ST1W_z_p_bi: {
+      int vl = GetVectorLengthInBytes();
+      int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
+      int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
+      VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+      int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+      uint64_t offset =
+          (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+      VectorFormat vform =
+          SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+      LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+      addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+      SVEStructuredStoreHelper(vform,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
+    case ST1B_z_p_br:
+    case ST1D_z_p_br:
+    case ST1H_z_p_br:
+    case ST1W_z_p_br: {
+      uint64_t offset = ReadXRegister(instr->GetRm());
+      offset <<= instr->ExtractBits(24, 23);
+      VectorFormat vform =
+          SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
+      LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+      addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
+      SVEStructuredStoreHelper(vform,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister z_result;
+
+  switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_v:
+      dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
+      mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
+    case ST2B_z_p_bi_contiguous:
+    case ST2D_z_p_bi_contiguous:
+    case ST2H_z_p_bi_contiguous:
+    case ST2W_z_p_bi_contiguous:
+    case ST3B_z_p_bi_contiguous:
+    case ST3D_z_p_bi_contiguous:
+    case ST3H_z_p_bi_contiguous:
+    case ST3W_z_p_bi_contiguous:
+    case ST4B_z_p_bi_contiguous:
+    case ST4D_z_p_bi_contiguous:
+    case ST4H_z_p_bi_contiguous:
+    case ST4W_z_p_bi_contiguous: {
+      int vl = GetVectorLengthInBytes();
+      int msz = instr->ExtractBits(24, 23);
+      int reg_count = instr->ExtractBits(22, 21) + 1;
+      uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
+      LogicSVEAddressVector addr(
+          ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+      addr.SetMsizeInBytesLog2(msz);
+      addr.SetRegCount(reg_count);
+      SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
+    const Instruction* instr) {
+  switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
+    case ST2B_z_p_br_contiguous:
+    case ST2D_z_p_br_contiguous:
+    case ST2H_z_p_br_contiguous:
+    case ST2W_z_p_br_contiguous:
+    case ST3B_z_p_br_contiguous:
+    case ST3D_z_p_br_contiguous:
+    case ST3H_z_p_br_contiguous:
+    case ST3W_z_p_br_contiguous:
+    case ST4B_z_p_br_contiguous:
+    case ST4D_z_p_br_contiguous:
+    case ST4H_z_p_br_contiguous:
+    case ST4W_z_p_br_contiguous: {
+      int msz = instr->ExtractBits(24, 23);
+      uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
+      VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
+      LogicSVEAddressVector addr(
+          ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
+      addr.SetMsizeInBytesLog2(msz);
+      addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
+      SVEStructuredStoreHelper(vform,
+                               ReadPRegister(instr->GetPgLow8()),
+                               instr->GetRt(),
+                               addr);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
+  switch (instr->Mask(SVEStorePredicateRegisterMask)) {
+    case STR_p_bi: {
+      SimPRegister& pt = ReadPRegister(instr->GetPt());
+      int pl = GetPredicateLengthInBytes();
+      int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+      uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * pl;
+      for (int i = 0; i < pl; i++) {
+        Memory::Write(address + i, pt.GetLane<uint8_t>(i));
+      }
+      LogPWrite(instr->GetPt(), address);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
+  switch (instr->Mask(SVEStoreVectorRegisterMask)) {
+    case STR_z_bi: {
+      SimVRegister& zt = ReadVRegister(instr->GetRt());
+      int vl = GetVectorLengthInBytes();
+      int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
+      uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
+      uint64_t address = ReadXRegister(instr->GetRn()) + multiplier * vl;
+      for (int i = 0; i < vl; i++) {
+        Memory::Write(address + i, zt.GetLane<uint8_t>(i));
+      }
+      LogZWrite(instr->GetRt(), address);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEMulIndex(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zda = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+  switch (instr->Mask(SVEMulIndexMask)) {
+    case SDOT_z_zzzi_d:
+      sdot(vform,
+           zda,
+           zn,
+           ReadVRegister(instr->ExtractBits(19, 16)),
+           instr->ExtractBit(20));
+      break;
+    case SDOT_z_zzzi_s:
+      sdot(vform,
+           zda,
+           zn,
+           ReadVRegister(instr->ExtractBits(18, 16)),
+           instr->ExtractBits(20, 19));
+      break;
+    case UDOT_z_zzzi_d:
+      udot(vform,
+           zda,
+           zn,
+           ReadVRegister(instr->ExtractBits(19, 16)),
+           instr->ExtractBit(20));
+      break;
+    case UDOT_z_zzzi_s:
+      udot(vform,
+           zda,
+           zn,
+           ReadVRegister(instr->ExtractBits(18, 16)),
+           instr->ExtractBits(20, 19));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+  SimPRegister result;
+
+  switch (instr->Mask(SVEPartitionBreakConditionMask)) {
+    case BRKAS_p_p_p_z:
+    case BRKA_p_p_p:
+      brka(result, pg, pn);
+      break;
+    case BRKBS_p_p_p_z:
+    case BRKB_p_p_p:
+      brkb(result, pg, pn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (instr->ExtractBit(4) == 1) {
+    mov_merging(pd, pg, result);
+  } else {
+    mov_zeroing(pd, pg, result);
+  }
+
+  // Set flag if needed.
+  if (instr->ExtractBit(22) == 1) {
+    PredTest(kFormatVnB, pg, pd);
+  }
+}
+
+void Simulator::VisitSVEPropagateBreakToNextPartition(
+    const Instruction* instr) {
+  SimPRegister& pdm = ReadPRegister(instr->GetPd());
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+  switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
+    case BRKNS_p_p_pp:
+    case BRKN_p_p_pp:
+      brkn(pdm, pg, pn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  // Set flag if needed.
+  if (instr->ExtractBit(22) == 1) {
+    // Note that this ignores `pg`.
+    PredTest(kFormatVnB, GetPTrue(), pdm);
+  }
+}
+
+void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+  SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
+  SimVRegister zero;
+  dup_immediate(kFormatVnB, zero, 0);
+
+  switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
+    case PUNPKHI_p_p:
+      zip2(kFormatVnB, temp, temp, zero);
+      break;
+    case PUNPKLO_p_p:
+      zip1(kFormatVnB, temp, temp, zero);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
+}
+
+void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+  SimPRegister& pm = ReadPRegister(instr->GetPm());
+
+  SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
+  SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
+
+  switch (instr->Mask(SVEPermutePredicateElementsMask)) {
+    case TRN1_p_pp:
+      trn1(vform, temp0, temp0, temp1);
+      break;
+    case TRN2_p_pp:
+      trn2(vform, temp0, temp0, temp1);
+      break;
+    case UZP1_p_pp:
+      uzp1(vform, temp0, temp0, temp1);
+      break;
+    case UZP2_p_pp:
+      uzp2(vform, temp0, temp0, temp1);
+      break;
+    case ZIP1_p_pp:
+      zip1(vform, temp0, temp0, temp1);
+      break;
+    case ZIP2_p_pp:
+      zip2(vform, temp0, temp0, temp1);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+  Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
+}
+
+void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
+  switch (instr->Mask(SVEReversePredicateElementsMask)) {
+    case REV_p_p: {
+      VectorFormat vform = instr->GetSVEVectorFormat();
+      SimPRegister& pn = ReadPRegister(instr->GetPn());
+      SimPRegister& pd = ReadPRegister(instr->GetPd());
+      SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
+      rev(vform, temp, temp);
+      Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  // Second source register "Zm" is encoded where "Zn" would usually be.
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+  const int imm8h_mask = 0x001F0000;
+  const int imm8l_mask = 0x00001C00;
+  int index = instr->ExtractBits<imm8h_mask | imm8l_mask>();
+  int vl = GetVectorLengthInBytes();
+  index = (index >= vl) ? 0 : index;
+
+  switch (instr->Mask(SVEPermuteVectorExtractMask)) {
+    case EXT_z_zi_des:
+      ext(kFormatVnB, zdn, zdn, zm, index);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
+    case TRN1_z_zz:
+      trn1(vform, zd, zn, zm);
+      break;
+    case TRN2_z_zz:
+      trn2(vform, zd, zn, zm);
+      break;
+    case UZP1_z_zz:
+      uzp1(vform, zd, zn, zm);
+      break;
+    case UZP2_z_zz:
+      uzp2(vform, zd, zn, zm);
+      break;
+    case ZIP1_z_zz:
+      zip1(vform, zd, zn, zm);
+      break;
+    case ZIP2_z_zz:
+      zip2(vform, zd, zn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEConditionallyBroadcastElementToVector(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  int active_offset = -1;
+  switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
+    case CLASTA_z_p_zz:
+      active_offset = 1;
+      break;
+    case CLASTB_z_p_zz:
+      active_offset = 0;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (active_offset >= 0) {
+    std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+    if (value.first) {
+      dup_immediate(vform, zdn, value.second);
+    } else {
+      // Trigger a line of trace for the operation, even though it doesn't
+      // change the register value.
+      mov(vform, zdn, zdn);
+    }
+  }
+}
+
+void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& vdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  int active_offset = -1;
+  switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
+    case CLASTA_v_p_z:
+      active_offset = 1;
+      break;
+    case CLASTB_v_p_z:
+      active_offset = 0;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (active_offset >= 0) {
+    LogicVRegister dst(vdn);
+    uint64_t src1_value = dst.Uint(vform, 0);
+    std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
+    dup_immediate(vform, vdn, 0);
+    dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
+  }
+}
+
+void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  int active_offset = -1;
+  switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
+    case CLASTA_r_p_z:
+      active_offset = 1;
+      break;
+    case CLASTB_r_p_z:
+      active_offset = 0;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (active_offset >= 0) {
+    std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+    uint64_t masked_src = ReadXRegister(instr->GetRd()) &
+                          GetUintMask(LaneSizeInBitsFromFormat(vform));
+    WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
+  }
+}
+
+void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& vdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  int active_offset = -1;
+  switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
+    case LASTA_v_p_z:
+      active_offset = 1;
+      break;
+    case LASTB_v_p_z:
+      active_offset = 0;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (active_offset >= 0) {
+    LogicVRegister dst(vdn);
+    std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+    dup_immediate(vform, vdn, 0);
+    dst.SetUint(vform, 0, value.second);
+  }
+}
+
+void Simulator::VisitSVEExtractElementToGeneralRegister(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  int active_offset = -1;
+  switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
+    case LASTA_r_p_z:
+      active_offset = 1;
+      break;
+    case LASTB_r_p_z:
+      active_offset = 0;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (active_offset >= 0) {
+    std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
+    WriteXRegister(instr->GetRd(), value.second);
+  }
+}
+
+void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  switch (instr->Mask(SVECompressActiveElementsMask)) {
+    case COMPACT_z_p_z:
+      compact(vform, zd, pg, zn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
+    const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister z_result;
+
+  switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
+    case CPY_z_p_r:
+      dup_immediate(vform,
+                    z_result,
+                    ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
+      mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+
+  SimVRegister result;
+  switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
+    case CPY_z_p_i: {
+      // Use unsigned arithmetic to avoid undefined behaviour during the shift.
+      uint64_t imm8 = instr->GetImmSVEIntWideSigned();
+      dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (instr->ExtractBit(14) != 0) {
+    mov_merging(vform, zd, pg, result);
+  } else {
+    mov_zeroing(vform, zd, pg, result);
+  }
+}
+
+void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+  SimVRegister result;
+
+  // In NEON, the chunk size in which elements are REVersed is in the
+  // instruction mnemonic, and the element size attached to the register.
+  // SVE reverses the semantics; the mapping to logic functions below is to
+  // account for this.
+  VectorFormat chunk_form = instr->GetSVEVectorFormat();
+  VectorFormat element_form = kFormatUndefined;
+
+  switch (instr->Mask(SVEReverseWithinElementsMask)) {
+    case RBIT_z_p_z:
+      rbit(chunk_form, result, zn);
+      break;
+    case REVB_z_z:
+      VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
+                  (chunk_form == kFormatVnD));
+      element_form = kFormatVnB;
+      break;
+    case REVH_z_z:
+      VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
+      element_form = kFormatVnH;
+      break;
+    case REVW_z_z:
+      VIXL_ASSERT(chunk_form == kFormatVnD);
+      element_form = kFormatVnS;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
+    VIXL_ASSERT(element_form != kFormatUndefined);
+    switch (chunk_form) {
+      case kFormatVnH:
+        rev16(element_form, result, zn);
+        break;
+      case kFormatVnS:
+        rev32(element_form, result, zn);
+        break;
+      case kFormatVnD:
+        rev64(element_form, result, zn);
+        break;
+      default:
+        VIXL_UNIMPLEMENTED();
+    }
+  }
+
+  mov_merging(chunk_form, zd, pg, result);
+}
+
+void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zdn = ReadVRegister(instr->GetRd());
+  SimVRegister& zm = ReadVRegister(instr->GetRn());
+  SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+
+  switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
+    case SPLICE_z_p_zz_des:
+      splice(vform, zdn, pg, zdn, zm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
+    case DUP_z_r:
+      dup_immediate(instr->GetSVEVectorFormat(),
+                    zd,
+                    ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
+    case INSR_z_v:
+      insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
+    case INSR_z_r:
+      insr(vform, zd, ReadXRegister(instr->GetRn()));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  switch (instr->Mask(SVEBroadcastIndexElementMask)) {
+    case DUP_z_zi: {
+      std::pair<int, int> index_and_lane_size =
+          instr->GetSVEPermuteIndexAndLaneSizeLog2();
+      int index = index_and_lane_size.first;
+      int lane_size_in_bytes_log_2 = index_and_lane_size.second;
+      VectorFormat vform =
+          SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
+      if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
+        // Out of bounds, set the destination register to zero.
+        dup_immediate(kFormatVnD, zd, 0);
+      } else {
+        dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
+      }
+      return;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  switch (instr->Mask(SVEReverseVectorElementsMask)) {
+    case REV_z_z:
+      rev(vform, zd, ReadVRegister(instr->GetRn()));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  switch (instr->Mask(SVEUnpackVectorElementsMask)) {
+    case SUNPKHI_z_z:
+      unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
+      break;
+    case SUNPKLO_z_z:
+      unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
+      break;
+    case UUNPKHI_z_z:
+      unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
+      break;
+    case UUNPKLO_z_z:
+      unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVETableLookup(const Instruction* instr) {
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  switch (instr->Mask(SVETableLookupMask)) {
+    case TBL_z_zz_1:
+      Table(instr->GetSVEVectorFormat(),
+            zd,
+            ReadVRegister(instr->GetRn()),
+            ReadVRegister(instr->GetRm()));
+      return;
+    default:
+      break;
+  }
+}
+
+void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+
+  switch (instr->Mask(SVEPredicateCountMask)) {
+    case CNTP_r_p_p: {
+      WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
+  Instr op = instr->Mask(SVEPredicateLogicalMask);
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+  SimPRegister& pm = ReadPRegister(instr->GetPm());
+  SimPRegister result;
+  switch (op) {
+    case ANDS_p_p_pp_z:
+    case AND_p_p_pp_z:
+    case BICS_p_p_pp_z:
+    case BIC_p_p_pp_z:
+    case EORS_p_p_pp_z:
+    case EOR_p_p_pp_z:
+    case NANDS_p_p_pp_z:
+    case NAND_p_p_pp_z:
+    case NORS_p_p_pp_z:
+    case NOR_p_p_pp_z:
+    case ORNS_p_p_pp_z:
+    case ORN_p_p_pp_z:
+    case ORRS_p_p_pp_z:
+    case ORR_p_p_pp_z:
+      SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
+                                result,
+                                pn,
+                                pm);
+      break;
+    case SEL_p_p_pp:
+      sel(pd, pg, pn, pm);
+      return;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  mov_zeroing(pd, pg, result);
+  if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
+    PredTest(kFormatVnB, pg, pd);
+  }
+}
+
+void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
+  LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
+  LogicPRegister pdn = ReadPRegister(instr->GetPd());
+  switch (instr->Mask(SVEPredicateFirstActiveMask)) {
+    case PFIRST_p_p_p:
+      pfirst(pdn, pg, pdn);
+      // TODO: Is this broken when pg == pdn?
+      PredTest(kFormatVnB, pg, pdn);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
+  // This group only contains PTRUE{S}, and there are no unallocated encodings.
+  VIXL_STATIC_ASSERT(
+      SVEPredicateInitializeMask ==
+      (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
+  VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
+              (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
+
+  LogicPRegister pdn = ReadPRegister(instr->GetPd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
+  if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
+}
+
+void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
+  // This group only contains PNEXT, and there are no unallocated encodings.
+  VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
+  VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
+
+  LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
+  LogicPRegister pdn = ReadPRegister(instr->GetPd());
+  VectorFormat vform = instr->GetSVEVectorFormat();
+
+  pnext(vform, pdn, pg, pdn);
+  // TODO: Is this broken when pg == pdn?
+  PredTest(vform, pg, pdn);
+}
+
+void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
+    const Instruction* instr) {
+  LogicPRegister pd(ReadPRegister(instr->GetPd()));
+  LogicPRegister pg(ReadPRegister(instr->GetPn()));
+  FlagsUpdate flags = LeaveFlags;
+  switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
+    case RDFFR_p_p_f:
+      // Do nothing.
+      break;
+    case RDFFRS_p_p_f:
+      flags = SetFlags;
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  LogicPRegister ffr(ReadFFR());
+  mov_zeroing(pd, pg, ffr);
+
+  if (flags == SetFlags) {
+    PredTest(kFormatVnB, pg, pd);
+  }
+}
+
+void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
+    const Instruction* instr) {
+  LogicPRegister pd(ReadPRegister(instr->GetPd()));
+  LogicPRegister ffr(ReadFFR());
+  switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
+    case RDFFR_p_f:
+      mov(pd, ffr);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
+  switch (instr->Mask(SVEPredicateTestMask)) {
+    case PTEST_p_p:
+      PredTest(kFormatVnB,
+               ReadPRegister(instr->ExtractBits(13, 10)),
+               ReadPRegister(instr->GetPn()));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
+  switch (instr->Mask(SVEPredicateZeroMask)) {
+    case PFALSE_p:
+      pfalse(ReadPRegister(instr->GetPd()));
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
+  SimPRegister& pd = ReadPRegister(instr->GetPd());
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimPRegister& pn = ReadPRegister(instr->GetPn());
+  SimPRegister& pm = ReadPRegister(instr->GetPm());
+
+  bool set_flags = false;
+  switch (instr->Mask(SVEPropagateBreakMask)) {
+    case BRKPAS_p_p_pp:
+      set_flags = true;
+      VIXL_FALLTHROUGH();
+    case BRKPA_p_p_pp:
+      brkpa(pd, pg, pn, pm);
+      break;
+    case BRKPBS_p_p_pp:
+      set_flags = true;
+      VIXL_FALLTHROUGH();
+    case BRKPB_p_p_pp:
+      brkpb(pd, pg, pn, pm);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+
+  if (set_flags) {
+    PredTest(kFormatVnB, pg, pd);
+  }
+}
+
+void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
+  uint64_t length = 0;
+  switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
+    case ADDPL_r_ri:
+      length = GetPredicateLengthInBytes();
+      break;
+    case ADDVL_r_ri:
+      length = GetVectorLengthInBytes();
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+  uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
+  WriteXRegister(instr->GetRd(),
+                 base + (length * instr->GetImmSVEVLScale()),
+                 LogRegWrites,
+                 Reg31IsStackPointer);
+}
+
+void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
+  int64_t scale = instr->GetImmSVEVLScale();
+
+  switch (instr->Mask(SVEStackFrameSizeMask)) {
+    case RDVL_r_i:
+      WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
+      break;
+    default:
+      VIXL_UNIMPLEMENTED();
+  }
+}
+
+void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
+  // The only instruction in this group is `sel`, and there are no unused
+  // encodings.
+  VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
+
+  VectorFormat vform = instr->GetSVEVectorFormat();
+  SimVRegister& zd = ReadVRegister(instr->GetRd());
+  SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
+  SimVRegister& zn = ReadVRegister(instr->GetRn());
+  SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+  sel(vform, zd, pg, zn, zm);
+}
+
+void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
+  switch (instr->Mask(SVEFFRInitialiseMask)) {
+    case SETFFR_f: {
+      LogicPRegister ffr(ReadFFR());
+      ffr.SetAllBits();
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
+  switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
+    case WRFFR_f_p: {
+      SimPRegister pn(ReadPRegister(instr->GetPn()));
+      bool last_active = true;
+      for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
+        bool active = pn.GetBit(i);
+        if (active && !last_active) {
+          // `pn` is non-monotonic. This is UNPREDICTABLE.
+          VIXL_ABORT();
+        }
+        last_active = active;
+      }
+      mov(ReadFFR(), pn);
+      break;
+    }
+    default:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
+}
+
+void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
+  bool is_signed;
+  switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
+    case LD1B_z_p_bi_u8:
+    case LD1B_z_p_bi_u16:
+    case LD1B_z_p_bi_u32:
+    case LD1B_z_p_bi_u64:
+    case LD1H_z_p_bi_u16:
+    case LD1H_z_p_bi_u32:
+    case LD1H_z_p_bi_u64:
+    case LD1W_z_p_bi_u32:
+    case LD1W_z_p_bi_u64:
+    case LD1D_z_p_bi_u64:
+      is_signed = false;
+      break;
+    case LD1SB_z_p_bi_s16:
+    case LD1SB_z_p_bi_s32:
+    case LD1SB_z_p_bi_s64:
+    case LD1SH_z_p_bi_s32:
+    case LD1SH_z_p_bi_s64:
+    case LD1SW_z_p_bi_s64:
+      is_signed = true;
+      break;
+    default:
+      // This encoding group is complete, so no other values should be possible.
+      VIXL_UNREACHABLE();
+      is_signed = false;
+      break;
+  }
+
+  int vl = GetVectorLengthInBytes();
+  int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+  int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+  VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
+  int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
+  uint64_t offset =
+      (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredLoadHelper(vform,
+                          ReadPRegister(instr->GetPgLow8()),
+                          instr->GetRt(),
+                          addr,
+                          is_signed);
+}
+
+void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
+    const Instruction* instr) {
+  bool is_signed;
+  switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
+    case LD1B_z_p_br_u8:
+    case LD1B_z_p_br_u16:
+    case LD1B_z_p_br_u32:
+    case LD1B_z_p_br_u64:
+    case LD1H_z_p_br_u16:
+    case LD1H_z_p_br_u32:
+    case LD1H_z_p_br_u64:
+    case LD1W_z_p_br_u32:
+    case LD1W_z_p_br_u64:
+    case LD1D_z_p_br_u64:
+      is_signed = false;
+      break;
+    case LD1SB_z_p_br_s16:
+    case LD1SB_z_p_br_s32:
+    case LD1SB_z_p_br_s64:
+    case LD1SH_z_p_br_s32:
+    case LD1SH_z_p_br_s64:
+    case LD1SW_z_p_br_s64:
+      is_signed = true;
+      break;
+    default:
+      // This encoding group is complete, so no other values should be possible.
+      VIXL_UNREACHABLE();
+      is_signed = false;
+      break;
+  }
+
+  int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
+  int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
+  VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
+  VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
+  uint64_t offset = ReadXRegister(instr->GetRm());
+  offset <<= msize_in_bytes_log2;
+  LogicSVEAddressVector addr(ReadXRegister(instr->GetRn()) + offset);
+  addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
+  SVEStructuredLoadHelper(vform,
+                          ReadPRegister(instr->GetPgLow8()),
+                          instr->GetRt(),
+                          addr,
+                          is_signed);
+}
 
 void Simulator::DoUnreachable(const Instruction* instr) {
   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 7cb7419a..1a89dff7 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -37,7 +37,6 @@
 #include "cpu-features-auditor-aarch64.h"
 #include "disasm-aarch64.h"
 #include "instructions-aarch64.h"
-#include "instrument-aarch64.h"
 #include "simulator-constants-aarch64.h"
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
@@ -77,6 +76,22 @@ class Memory {
     return value;
   }
 
+  template <typename A>
+  static uint64_t Read(int size_in_bytes, A address) {
+    switch (size_in_bytes) {
+      case 1:
+        return Read<uint8_t>(address);
+      case 2:
+        return Read<uint16_t>(address);
+      case 4:
+        return Read<uint32_t>(address);
+      case 8:
+        return Read<uint64_t>(address);
+    }
+    VIXL_UNREACHABLE();
+    return 0;
+  }
+
   template <typename T, typename A>
   static void Write(A address, T value) {
     address = AddressUntag(address);
@@ -87,19 +102,33 @@ class Memory {
   }
 };
 
-// Represent a register (r0-r31, v0-v31).
-template <int kSizeInBytes>
+// Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
+template <unsigned kMaxSizeInBits>
 class SimRegisterBase {
  public:
-  SimRegisterBase() : written_since_last_log_(false) {}
+  static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte;
+  VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits);
+
+  SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); }
+
+  unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; }
+  unsigned GetSizeInBytes() const { return size_in_bytes_; }
+
+  void SetSizeInBytes(unsigned size_in_bytes) {
+    VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes);
+    size_in_bytes_ = size_in_bytes;
+  }
+  void SetSizeInBits(unsigned size_in_bits) {
+    VIXL_ASSERT(size_in_bits <= kMaxSizeInBits);
+    VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
+    SetSizeInBytes(size_in_bits / kBitsPerByte);
+  }
 
   // Write the specified value. The value is zero-extended if necessary.
   template <typename T>
   void Write(T new_value) {
-    if (sizeof(new_value) < kSizeInBytes) {
-      // All AArch64 registers are zero-extending.
-      memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
-    }
+    // All AArch64 registers are zero-extending.
+    if (sizeof(new_value) < GetSizeInBytes()) Clear();
     WriteLane(new_value, 0);
     NotifyRegisterWrite();
   }
@@ -108,6 +137,11 @@ class SimRegisterBase {
     Write(new_value);
   }
 
+  void Clear() {
+    memset(value_, 0, kMaxSizeInBytes);
+    NotifyRegisterWrite();
+  }
+
   // Insert a typed value into a register, leaving the rest of the register
   // unchanged. The lane parameter indicates where in the register the value
   // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
@@ -137,6 +171,17 @@ class SimRegisterBase {
     return GetLane(lane);
   }
 
+  // Get the value of a specific bit, indexed from the least-significant bit of
+  // lane 0.
+  bool GetBit(int bit) const {
+    int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte);
+    int byte = bit / (sizeof(value_[0]) * kBitsPerByte);
+    return ((value_[byte] >> bit_in_byte) & 1) != 0;
+  }
+
+  // Return a pointer to the raw, underlying byte array.
+  const uint8_t* GetBytes() const { return value_; }
+
   // TODO: Make this return a map of updated bytes, so that we can highlight
   // updated lanes for load-and-insert. (That never happens for scalar code, but
   // NEON has some instructions that can update individual lanes.)
@@ -145,7 +190,9 @@ class SimRegisterBase {
   void NotifyRegisterLogged() { written_since_last_log_ = false; }
 
  protected:
-  uint8_t value_[kSizeInBytes];
+  uint8_t value_[kMaxSizeInBytes];
+
+  unsigned size_in_bytes_;
 
   // Helpers to aid with register tracing.
   bool written_since_last_log_;
@@ -156,38 +203,152 @@ class SimRegisterBase {
   template <typename T>
   void ReadLane(T* dst, int lane) const {
     VIXL_ASSERT(lane >= 0);
-    VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= kSizeInBytes);
+    VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes());
     memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
   }
 
   template <typename T>
   void WriteLane(T src, int lane) {
     VIXL_ASSERT(lane >= 0);
-    VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= kSizeInBytes);
+    VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes());
     memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
   }
+
+  // The default ReadLane and WriteLane methods assume what we are copying is
+  // "trivially copyable" by using memcpy. We have to provide alternative
+  // implementations for SimFloat16 which cannot be copied this way.
+
+  void ReadLane(vixl::internal::SimFloat16* dst, int lane) const {
+    uint16_t rawbits;
+    ReadLane(&rawbits, lane);
+    *dst = RawbitsToFloat16(rawbits);
+  }
+
+  void WriteLane(vixl::internal::SimFloat16 src, int lane) {
+    WriteLane(Float16ToRawbits(src), lane);
+  }
+};
+
+typedef SimRegisterBase<kXRegSize> SimRegister;      // r0-r31
+typedef SimRegisterBase<kPRegMaxSize> SimPRegister;  // p0-p15
+// FFR has the same format as a predicate register.
+typedef SimPRegister SimFFRRegister;
+
+// v0-v31 and z0-z31
+class SimVRegister : public SimRegisterBase<kZRegMaxSize> {
+ public:
+  SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {}
+
+  void NotifyAccessAsZ() { accessed_as_z_ = true; }
+
+  void NotifyRegisterLogged() {
+    SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged();
+    accessed_as_z_ = false;
+  }
+
+  bool AccessedAsZSinceLastLog() const { return accessed_as_z_; }
+
+ private:
+  bool accessed_as_z_;
+};
+
+// Representation of a SVE predicate register.
+class LogicPRegister {
+ public:
+  inline LogicPRegister(
+      SimPRegister& other)  // NOLINT(runtime/references)(runtime/explicit)
+      : register_(other) {}
+
+  // Set a conveniently-sized block to 16 bits as the minimum predicate length
+  // is 16 bits and allow to be increased to multiples of 16 bits.
+  typedef uint16_t ChunkType;
+
+  // Assign a bit into the end positon of the specified lane.
+  // The bit is zero-extended if necessary.
+  void SetActive(VectorFormat vform, int lane_index, bool value) {
+    int psize = LaneSizeInBytesFromFormat(vform);
+    int bit_index = lane_index * psize;
+    int byte_index = bit_index / kBitsPerByte;
+    int bit_offset = bit_index % kBitsPerByte;
+    uint8_t byte = register_.GetLane<uint8_t>(byte_index);
+    register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value));
+  }
+
+  bool IsActive(VectorFormat vform, int lane_index) const {
+    int psize = LaneSizeInBytesFromFormat(vform);
+    int bit_index = lane_index * psize;
+    int byte_index = bit_index / kBitsPerByte;
+    int bit_offset = bit_index % kBitsPerByte;
+    uint8_t byte = register_.GetLane<uint8_t>(byte_index);
+    return ExtractBit(byte, bit_offset);
+  }
+
+  // The accessors for bulk processing.
+  int GetChunkCount() const {
+    VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0);
+    return register_.GetSizeInBytes() / sizeof(ChunkType);
+  }
+
+  ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); }
+
+  void SetChunk(int lane, ChunkType new_value) {
+    SetActiveMask(lane, new_value);
+  }
+
+  void SetAllBits() {
+    int chunk_size = sizeof(ChunkType) * kBitsPerByte;
+    ChunkType bits = GetUintMask(chunk_size);
+    for (int lane = 0;
+         lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
+         lane++) {
+      SetChunk(lane, bits);
+    }
+  }
+
+  template <typename T>
+  T GetActiveMask(int lane) const {
+    return register_.GetLane<T>(lane);
+  }
+
+  template <typename T>
+  void SetActiveMask(int lane, T new_value) {
+    register_.Insert<T>(lane, new_value);
+  }
+
+  void Clear() { register_.Clear(); }
+
+  bool Aliases(const LogicPRegister& other) const {
+    return &register_ == &other.register_;
+  }
+
+ private:
+  // The bit assignment is zero-extended to fill the size of predicate element.
+  uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) {
+    VIXL_ASSERT(index >= 0);
+    VIXL_ASSERT(index + psize <= kBitsPerByte);
+    int bits = value ? 1 : 0;
+    switch (psize) {
+      case 1:
+        AssignBit(byte, index, bits);
+        break;
+      case 2:
+        AssignBits(byte, index, 0x03, bits);
+        break;
+      case 4:
+        AssignBits(byte, index, 0x0f, bits);
+        break;
+      case 8:
+        AssignBits(byte, index, 0xff, bits);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return 0;
+    }
+    return byte;
+  }
+
+  SimPRegister& register_;
 };
-typedef SimRegisterBase<kXRegSizeInBytes> SimRegister;   // r0-r31
-typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister;  // v0-v31
-
-// The default ReadLane and WriteLane methods assume what we are copying is
-// "trivially copyable" by using memcpy. We have to provide alternative
-// implementations for SimFloat16 which cannot be copied this way.
-
-template <>
-template <>
-inline void SimVRegister::ReadLane(vixl::internal::SimFloat16* dst,
-                                   int lane) const {
-  uint16_t rawbits;
-  ReadLane(&rawbits, lane);
-  *dst = RawbitsToFloat16(rawbits);
-}
-
-template <>
-template <>
-inline void SimVRegister::WriteLane(vixl::internal::SimFloat16 src, int lane) {
-  WriteLane(Float16ToRawbits(src), lane);
-}
 
 // Representation of a vector register, with typed getters and setters for lanes
 // and additional information to represent lane state.
@@ -205,6 +366,7 @@ class LogicVRegister {
   }
 
   int64_t Int(VectorFormat vform, int index) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     int64_t element;
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
@@ -227,6 +389,7 @@ class LogicVRegister {
   }
 
   uint64_t Uint(VectorFormat vform, int index) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     uint64_t element;
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
@@ -260,6 +423,7 @@ class LogicVRegister {
   }
 
   void SetInt(VectorFormat vform, int index, int64_t value) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, static_cast<int8_t>(value));
@@ -287,6 +451,7 @@ class LogicVRegister {
   }
 
   void SetUint(VectorFormat vform, int index, uint64_t value) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, static_cast<uint8_t>(value));
@@ -313,7 +478,98 @@ class LogicVRegister {
     }
   }
 
+  void ReadIntFromMem(VectorFormat vform,
+                      unsigned msize_in_bits,
+                      int index,
+                      uint64_t addr) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+    int64_t value;
+    switch (msize_in_bits) {
+      case 8:
+        value = Memory::Read<int8_t>(addr);
+        break;
+      case 16:
+        value = Memory::Read<int16_t>(addr);
+        break;
+      case 32:
+        value = Memory::Read<int32_t>(addr);
+        break;
+      case 64:
+        value = Memory::Read<int64_t>(addr);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return;
+    }
+
+    unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
+    VIXL_ASSERT(esize_in_bits >= msize_in_bits);
+    switch (esize_in_bits) {
+      case 8:
+        register_.Insert(index, static_cast<int8_t>(value));
+        break;
+      case 16:
+        register_.Insert(index, static_cast<int16_t>(value));
+        break;
+      case 32:
+        register_.Insert(index, static_cast<int32_t>(value));
+        break;
+      case 64:
+        register_.Insert(index, static_cast<int64_t>(value));
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return;
+    }
+  }
+
+  void ReadUintFromMem(VectorFormat vform,
+                       unsigned msize_in_bits,
+                       int index,
+                       uint64_t addr) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+    uint64_t value;
+    switch (msize_in_bits) {
+      case 8:
+        value = Memory::Read<uint8_t>(addr);
+        break;
+      case 16:
+        value = Memory::Read<uint16_t>(addr);
+        break;
+      case 32:
+        value = Memory::Read<uint32_t>(addr);
+        break;
+      case 64:
+        value = Memory::Read<uint64_t>(addr);
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return;
+    }
+
+    unsigned esize_in_bits = LaneSizeInBitsFromFormat(vform);
+    VIXL_ASSERT(esize_in_bits >= msize_in_bits);
+    switch (esize_in_bits) {
+      case 8:
+        register_.Insert(index, static_cast<uint8_t>(value));
+        break;
+      case 16:
+        register_.Insert(index, static_cast<uint16_t>(value));
+        break;
+      case 32:
+        register_.Insert(index, static_cast<uint32_t>(value));
+        break;
+      case 64:
+        register_.Insert(index, static_cast<uint64_t>(value));
+        break;
+      default:
+        VIXL_UNREACHABLE();
+        return;
+    }
+  }
+
   void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
         register_.Insert(index, Memory::Read<uint8_t>(addr));
@@ -334,6 +590,7 @@ class LogicVRegister {
   }
 
   void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
     uint64_t value = Uint(vform, index);
     switch (LaneSizeInBitsFromFormat(vform)) {
       case 8:
@@ -361,11 +618,20 @@ class LogicVRegister {
     register_.Insert(index, value);
   }
 
-  // When setting a result in a register of size less than Q, the top bits of
-  // the Q register must be cleared.
+  template <typename T>
+  void SetFloat(VectorFormat vform, int index, T value) const {
+    if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
+    register_.Insert(index, value);
+  }
+
+  // When setting a result in a register larger than the result itself, the top
+  // bits of the register must be cleared.
   void ClearForWrite(VectorFormat vform) const {
+    // SVE destinations write whole registers, so we have nothing to clear.
+    if (IsSVEFormat(vform)) return;
+
     unsigned size = RegisterSizeInBytesFromFormat(vform);
-    for (unsigned i = size; i < kQRegSizeInBytes; i++) {
+    for (unsigned i = size; i < register_.GetSizeInBytes(); i++) {
       SetUint(kFormat16B, i, 0);
     }
   }
@@ -481,15 +747,129 @@ class LogicVRegister {
     return *this;
   }
 
+  int LaneCountFromFormat(VectorFormat vform) const {
+    if (IsSVEFormat(vform)) {
+      return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform);
+    } else {
+      return vixl::aarch64::LaneCountFromFormat(vform);
+    }
+  }
+
  private:
   SimVRegister& register_;
 
   // Allocate one saturation state entry per lane; largest register is type Q,
   // and lanes can be a minimum of one byte wide.
-  Saturation saturated_[kQRegSizeInBytes];
+  Saturation saturated_[kZRegMaxSizeInBytes];
 
   // Allocate one rounding state entry per lane.
-  bool round_[kQRegSizeInBytes];
+  bool round_[kZRegMaxSizeInBytes];
+};
+
+// Represent an SVE addressing mode and abstract per-lane address generation to
+// make iteration easy.
+//
+// Contiguous accesses are described with a simple base address, the memory
+// occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in
+// each struct (`SetRegCount()`).
+//
+// Scatter-gather accesses also require a SimVRegister and information about how
+// to extract lanes from it.
+class LogicSVEAddressVector {
+ public:
+  // scalar-plus-scalar
+  // scalar-plus-immediate
+  explicit LogicSVEAddressVector(uint64_t base)
+      : base_(base),
+        msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
+        reg_count_(1),
+        vector_(NULL),
+        vector_form_(kFormatUndefined),
+        vector_mod_(NO_SVE_OFFSET_MODIFIER),
+        vector_shift_(0) {}
+
+  // scalar-plus-vector
+  // vector-plus-immediate
+  //    `base` should be the constant used for each element. That is, the value
+  //    of `xn`, or `#<imm>`.
+  //    `vector` should be the SimVRegister with offsets for each element. The
+  //    vector format must be specified; SVE scatter/gather accesses typically
+  //    support both 32-bit and 64-bit addressing.
+  //
+  //    `mod` and `shift` correspond to the modifiers applied to each element in
+  //    scalar-plus-vector forms, such as those used for unpacking and
+  //    sign-extension. They are not used for vector-plus-immediate.
+  LogicSVEAddressVector(uint64_t base,
+                        const SimVRegister* vector,
+                        VectorFormat vform,
+                        SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER,
+                        int shift = 0)
+      : base_(base),
+        msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
+        reg_count_(1),
+        vector_(vector),
+        vector_form_(vform),
+        vector_mod_(mod),
+        vector_shift_(shift) {}
+
+  // Set `msize` -- the memory occupied by each lane -- for address
+  // calculations.
+  void SetMsizeInBytesLog2(int msize_in_bytes_log2) {
+    VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2));
+    VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2));
+    msize_in_bytes_log2_ = msize_in_bytes_log2;
+  }
+
+  bool HasMsize() const {
+    return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2;
+  }
+
+  int GetMsizeInBytesLog2() const {
+    VIXL_ASSERT(HasMsize());
+    return msize_in_bytes_log2_;
+  }
+  int GetMsizeInBitsLog2() const {
+    return GetMsizeInBytesLog2() + kBitsPerByteLog2;
+  }
+
+  int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); }
+  int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); }
+
+  void SetRegCount(int reg_count) {
+    VIXL_ASSERT(reg_count >= 1);  // E.g. ld1/st1
+    VIXL_ASSERT(reg_count <= 4);  // E.g. ld4/st4
+    reg_count_ = reg_count;
+  }
+
+  int GetRegCount() const { return reg_count_; }
+
+  // Full per-element address calculation for structured accesses.
+  //
+  // Note that the register number argument (`reg`) is zero-based.
+  uint64_t GetElementAddress(int lane, int reg) const {
+    VIXL_ASSERT(reg < GetRegCount());
+    // Individual structures are always contiguous in memory, so this
+    // implementation works for both contiguous and scatter-gather addressing.
+    return GetStructAddress(lane) + (reg * GetMsizeInBytes());
+  }
+
+  // Full per-struct address calculation for structured accesses.
+  uint64_t GetStructAddress(int lane) const;
+
+  bool IsContiguous() const { return vector_ == NULL; }
+  bool IsScatterGather() const { return !IsContiguous(); }
+
+ private:
+  uint64_t base_;
+  int msize_in_bytes_log2_;
+  int reg_count_;
+
+  const SimVRegister* vector_;
+  VectorFormat vector_form_;
+  SVEOffsetModifier vector_mod_;
+  int vector_shift_;
+
+  static const int kUnknownMsizeInBytesLog2 = -1;
 };
 
 // The proper way to initialize a simulated system register (such as NZCV) is as
@@ -733,6 +1113,11 @@ class Simulator : public DecoderVisitor {
     VIXL_ASSERT(IsWordAligned(pc_));
     pc_modified_ = false;
 
+    if (movprfx_ != NULL) {
+      VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_));
+      movprfx_ = NULL;
+    }
+
     // On guarded pages, if BType is not zero, take an exception on any
     // instruction other than BTI, PACI[AB]SP, HLT or BRK.
     if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
@@ -774,13 +1159,6 @@ class Simulator : public DecoderVisitor {
 #undef DECLARE
 
 
-#define DECLARE(A)                                                             \
-  VIXL_NO_RETURN_IN_DEBUG_MODE virtual void Visit##A(const Instruction* instr) \
-      VIXL_OVERRIDE;
-  VISITOR_LIST_THAT_DONT_RETURN_IN_DEBUG_MODE(DECLARE)
-#undef DECLARE
-
-
   // Integer register accessors.
 
   // Basic accessor: Read the register as the specified type.
@@ -827,6 +1205,13 @@ class Simulator : public DecoderVisitor {
     return ReadXRegister(code, r31mode);
   }
 
+  SimPRegister& ReadPRegister(unsigned code) {
+    VIXL_ASSERT(code < kNumberOfPRegisters);
+    return pregisters_[code];
+  }
+
+  SimFFRRegister& ReadFFR() { return ffr_register_; }
+
   // As above, with parameterized size and return type. The value is
   // either zero-extended or truncated to fit, as required.
   template <typename T>
@@ -877,6 +1262,10 @@ class Simulator : public DecoderVisitor {
 
   // Write 'value' into an integer register. The value is zero-extended. This
   // behaviour matches AArch64 register writes.
+  //
+  // SP may be specified in one of two ways:
+  //  - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister)
+  //  - (code == 31) && (r31mode == Reg31IsStackPointer)
   template <typename T>
   void WriteRegister(unsigned code,
                      T value,
@@ -896,20 +1285,25 @@ class Simulator : public DecoderVisitor {
     VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
                 (sizeof(T) == kXRegSizeInBytes));
     VIXL_ASSERT(
-        code < kNumberOfRegisters ||
+        (code < kNumberOfRegisters) ||
         ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
 
-    if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
-      return;
-    }
-
-    if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) {
-      code = 31;
+    if (code == 31) {
+      if (r31mode == Reg31IsZeroRegister) {
+        // Discard writes to the zero register.
+        return;
+      } else {
+        code = kSPRegInternalCode;
+      }
     }
 
-    registers_[code].Write(value);
+    // registers_[31] is the stack pointer.
+    VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
+    registers_[code % kNumberOfRegisters].Write(value);
 
-    if (log_mode == LogRegWrites) LogRegister(code, r31mode);
+    if (log_mode == LogRegWrites) {
+      LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T)));
+    }
   }
   template <typename T>
   VIXL_DEPRECATED("WriteRegister",
@@ -1015,6 +1409,11 @@ class Simulator : public DecoderVisitor {
     uint8_t val[kQRegSizeInBytes];
   };
 
+  // A structure for representing a SVE Z register.
+  struct zreg_t {
+    uint8_t val[kZRegMaxSizeInBytes];
+  };
+
   // Basic accessor: read the register as the specified type.
   template <typename T>
   T ReadVRegister(unsigned code) const {
@@ -1130,7 +1529,8 @@ class Simulator : public DecoderVisitor {
                        (sizeof(value) == kHRegSizeInBytes) ||
                        (sizeof(value) == kSRegSizeInBytes) ||
                        (sizeof(value) == kDRegSizeInBytes) ||
-                       (sizeof(value) == kQRegSizeInBytes));
+                       (sizeof(value) == kQRegSizeInBytes) ||
+                       (sizeof(value) == kZRegMaxSizeInBytes));
     VIXL_ASSERT(code < kNumberOfVRegisters);
     vregisters_[code].Write(value);
 
@@ -1237,6 +1637,12 @@ class Simulator : public DecoderVisitor {
     WriteQRegister(code, value, log_mode);
   }
 
+  void WriteZRegister(unsigned code,
+                      zreg_t value,
+                      RegLogMode log_mode = LogRegWrites) {
+    WriteVRegister(code, value, log_mode);
+  }
+
   template <typename T>
   T ReadRegister(Register reg) const {
     return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister);
@@ -1357,14 +1763,16 @@ class Simulator : public DecoderVisitor {
     kPrintRegLaneSizeD = 3 << 0,
     kPrintRegLaneSizeX = kPrintRegLaneSizeD,
     kPrintRegLaneSizeQ = 4 << 0,
+    kPrintRegLaneSizeUnknown = 5 << 0,
 
     kPrintRegLaneSizeOffset = 0,
     kPrintRegLaneSizeMask = 7 << 0,
 
-    // The lane count.
+    // The overall register size.
     kPrintRegAsScalar = 0,
     kPrintRegAsDVector = 1 << 3,
     kPrintRegAsQVector = 2 << 3,
+    kPrintRegAsSVEVector = 3 << 3,
 
     kPrintRegAsVectorMask = 3 << 3,
 
@@ -1372,37 +1780,98 @@ class Simulator : public DecoderVisitor {
     // S-, H-, and D-sized lanes.)
     kPrintRegAsFP = 1 << 5,
 
-    // Supported combinations.
-
-    kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
-    kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
-    kPrintHReg = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP,
-    kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
-    kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
-
-    kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
-    kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
-    kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
-    kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
-    kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
-    kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
-    kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
-    kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
-    kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
-    kPrintReg1HFP = kPrintRegLaneSizeH | kPrintRegAsScalar | kPrintRegAsFP,
-    kPrintReg4HFP = kPrintRegLaneSizeH | kPrintRegAsDVector | kPrintRegAsFP,
-    kPrintReg8HFP = kPrintRegLaneSizeH | kPrintRegAsQVector | kPrintRegAsFP,
-    kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
-    kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
-    kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
-    kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
-    kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
-    kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
-    kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
-    kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
+    // With this flag, print helpers won't check that the upper bits are zero.
+    // This also forces the register name to be printed with the `reg<msb:0>`
+    // format.
+    //
+    // The flag is supported with any PrintRegisterFormat other than those with
+    // kPrintRegAsSVEVector.
+    kPrintRegPartial = 1 << 6,
+
+// Supported combinations.
+// These exist so that they can be referred to by name, but also because C++
+// does not allow enum types to hold values that aren't explicitly
+// enumerated, and we want to be able to combine the above flags.
+
+// Scalar formats.
+#define VIXL_DECL_PRINT_REG_SCALAR(size)                           \
+  kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \
+  kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_SCALAR_FP(size)                  \
+  VIXL_DECL_PRINT_REG_SCALAR(size)                           \
+  , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \
+    kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP
+    VIXL_DECL_PRINT_REG_SCALAR(W),
+    VIXL_DECL_PRINT_REG_SCALAR(X),
+    VIXL_DECL_PRINT_REG_SCALAR_FP(H),
+    VIXL_DECL_PRINT_REG_SCALAR_FP(S),
+    VIXL_DECL_PRINT_REG_SCALAR_FP(D),
+    VIXL_DECL_PRINT_REG_SCALAR(Q),
+#undef VIXL_DECL_PRINT_REG_SCALAR
+#undef VIXL_DECL_PRINT_REG_SCALAR_FP
+
+#define VIXL_DECL_PRINT_REG_NEON(count, type, size)                     \
+  kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \
+  kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size)                   \
+  VIXL_DECL_PRINT_REG_NEON(count, type, size)                            \
+  , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \
+    kPrintReg##count##type##PartialFP =                                  \
+        kPrintReg##count##type##Partial | kPrintRegAsFP
+    VIXL_DECL_PRINT_REG_NEON(1, B, Scalar),
+    VIXL_DECL_PRINT_REG_NEON(8, B, DVector),
+    VIXL_DECL_PRINT_REG_NEON(16, B, QVector),
+    VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar),
+    VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector),
+    VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector),
+    VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar),
+    VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector),
+    VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector),
+    VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar),
+    VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector),
+    VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar),
+#undef VIXL_DECL_PRINT_REG_NEON
+#undef VIXL_DECL_PRINT_REG_NEON_FP
+
+#define VIXL_DECL_PRINT_REG_SVE(type)                                 \
+  kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \
+  kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial
+#define VIXL_DECL_PRINT_REG_SVE_FP(type)                       \
+  VIXL_DECL_PRINT_REG_SVE(type)                                \
+  , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \
+    kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP
+    VIXL_DECL_PRINT_REG_SVE(B),
+    VIXL_DECL_PRINT_REG_SVE_FP(H),
+    VIXL_DECL_PRINT_REG_SVE_FP(S),
+    VIXL_DECL_PRINT_REG_SVE_FP(D),
+    VIXL_DECL_PRINT_REG_SVE(Q)
+#undef VIXL_DECL_PRINT_REG_SVE
+#undef VIXL_DECL_PRINT_REG_SVE_FP
   };
 
+  // Return `format` with the kPrintRegPartial flag set.
+  PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) {
+    // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the
+    // result of this cast will always be well-defined.
+    return static_cast<PrintRegisterFormat>(format | kPrintRegPartial);
+  }
+
+  // For SVE formats, return the format of a Q register part of it.
+  PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) {
+    VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
+    // Keep the FP and lane size fields.
+    int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP);
+    // The resulting format must always be partial, because we're not formatting
+    // the whole Z register.
+    q_format |= (kPrintRegAsQVector | kPrintRegPartial);
+
+    // This cast is always safe because NEON QVector formats support every
+    // combination of FP and lane size that SVE formats do.
+    return static_cast<PrintRegisterFormat>(q_format);
+  }
+
   unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
+    VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown);
     return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
   }
 
@@ -1411,17 +1880,51 @@ class Simulator : public DecoderVisitor {
   }
 
   unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
-    if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2;
-    if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2;
-
-    // Scalar types.
-    return GetPrintRegLaneSizeInBytesLog2(format);
+    switch (format & kPrintRegAsVectorMask) {
+      case kPrintRegAsScalar:
+        return GetPrintRegLaneSizeInBytesLog2(format);
+      case kPrintRegAsDVector:
+        return kDRegSizeInBytesLog2;
+      case kPrintRegAsQVector:
+        return kQRegSizeInBytesLog2;
+      default:
+      case kPrintRegAsSVEVector:
+        // We print SVE vectors in Q-sized chunks. These need special handling,
+        // and it's probably an error to call this function in that case.
+        VIXL_UNREACHABLE();
+        return kQRegSizeInBytesLog2;
+    }
   }
 
   unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
     return 1 << GetPrintRegSizeInBytesLog2(format);
   }
 
+  unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) {
+    return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2;
+  }
+
+  unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) {
+    return 1 << GetPrintRegSizeInBitsLog2(format);
+  }
+
+  const char* GetPartialRegSuffix(PrintRegisterFormat format) {
+    switch (GetPrintRegSizeInBitsLog2(format)) {
+      case kBRegSizeLog2:
+        return "<7:0>";
+      case kHRegSizeLog2:
+        return "<15:0>";
+      case kSRegSizeLog2:
+        return "<31:0>";
+      case kDRegSizeLog2:
+        return "<63:0>";
+      case kQRegSizeLog2:
+        return "<127:0>";
+    }
+    VIXL_UNREACHABLE();
+    return "<UNKNOWN>";
+  }
+
   unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
     unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
     unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
@@ -1429,6 +1932,21 @@ class Simulator : public DecoderVisitor {
     return 1 << (reg_size_log2 - lane_size_log2);
   }
 
+  uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) {
+    int print_as = format & kPrintRegAsVectorMask;
+    if (print_as == kPrintRegAsScalar) return 1;
+
+    // Vector formats, including SVE formats printed in Q-sized chunks.
+    static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001};
+    unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format);
+    VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks));
+    uint16_t mask = masks[size_in_bytes_log2];
+
+    // Exclude lanes that aren't visible in D vectors.
+    if (print_as == kPrintRegAsDVector) mask &= 0x00ff;
+    return mask;
+  }
+
   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
                                                     unsigned lane_size);
 
@@ -1459,6 +1977,10 @@ class Simulator : public DecoderVisitor {
     return format;
   }
 
+  PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) {
+    return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size));
+  }
+
   template <typename T>
   PrintRegisterFormat GetPrintRegisterFormat(T value) {
     return GetPrintRegisterFormatForSize(sizeof(value));
@@ -1485,99 +2007,314 @@ class Simulator : public DecoderVisitor {
   // Print all registers of the specified types.
   void PrintRegisters();
   void PrintVRegisters();
+  void PrintZRegisters();
   void PrintSystemRegisters();
 
   // As above, but only print the registers that have been updated.
   void PrintWrittenRegisters();
   void PrintWrittenVRegisters();
+  void PrintWrittenPRegisters();
 
   // As above, but respect LOG_REG and LOG_VREG.
   void LogWrittenRegisters() {
-    if (GetTraceParameters() & LOG_REGS) PrintWrittenRegisters();
+    if (ShouldTraceRegs()) PrintWrittenRegisters();
   }
   void LogWrittenVRegisters() {
-    if (GetTraceParameters() & LOG_VREGS) PrintWrittenVRegisters();
+    if (ShouldTraceVRegs()) PrintWrittenVRegisters();
+  }
+  void LogWrittenPRegisters() {
+    if (ShouldTraceVRegs()) PrintWrittenPRegisters();
   }
   void LogAllWrittenRegisters() {
     LogWrittenRegisters();
     LogWrittenVRegisters();
+    LogWrittenPRegisters();
+  }
+
+  // The amount of space to leave for a register name. This is used to keep the
+  // values vertically aligned. The longest register name has the form
+  // "z31<2047:1920>". The total overall value indentation must also take into
+  // account the fixed formatting: "# {name}: 0x{value}".
+  static const int kPrintRegisterNameFieldWidth = 14;
+
+  // Print whole, individual register values.
+  // - The format can be used to restrict how much of the register is printed,
+  //   but such formats indicate that the unprinted high-order bits are zero and
+  //   these helpers will assert that.
+  // - If the format includes the kPrintRegAsFP flag then human-friendly FP
+  //   value annotations will be printed.
+  // - The suffix can be used to add annotations (such as memory access
+  //   details), or to suppress the newline.
+  void PrintRegister(int code,
+                     PrintRegisterFormat format = kPrintXReg,
+                     const char* suffix = "\n");
+  void PrintVRegister(int code,
+                      PrintRegisterFormat format = kPrintReg1Q,
+                      const char* suffix = "\n");
+  // PrintZRegister and PrintPRegister print over several lines, so they cannot
+  // allow the suffix to be overridden.
+  void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
+  void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
+  void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ);
+  // Print a single Q-sized part of a Z register, or the corresponding two-byte
+  // part of a P register. These print single lines, and therefore allow the
+  // suffix to be overridden. The format must include the kPrintRegPartial flag.
+  void PrintPartialZRegister(int code,
+                             int q_index,
+                             PrintRegisterFormat format = kPrintRegVnQ,
+                             const char* suffix = "\n");
+  void PrintPartialPRegister(int code,
+                             int q_index,
+                             PrintRegisterFormat format = kPrintRegVnQ,
+                             const char* suffix = "\n");
+  void PrintPartialPRegister(const char* name,
+                             const SimPRegister& reg,
+                             int q_index,
+                             PrintRegisterFormat format = kPrintRegVnQ,
+                             const char* suffix = "\n");
+
+  // Like Print*Register (above), but respect trace parameters.
+  void LogRegister(unsigned code, PrintRegisterFormat format) {
+    if (ShouldTraceRegs()) PrintRegister(code, format);
+  }
+  void LogVRegister(unsigned code, PrintRegisterFormat format) {
+    if (ShouldTraceVRegs()) PrintVRegister(code, format);
+  }
+  void LogZRegister(unsigned code, PrintRegisterFormat format) {
+    if (ShouldTraceVRegs()) PrintZRegister(code, format);
+  }
+  void LogPRegister(unsigned code, PrintRegisterFormat format) {
+    if (ShouldTraceVRegs()) PrintPRegister(code, format);
+  }
+  void LogFFR(PrintRegisterFormat format) {
+    if (ShouldTraceVRegs()) PrintFFR(format);
   }
 
-  // Print individual register values (after update).
-  void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
-  void PrintVRegister(unsigned code, PrintRegisterFormat format);
+  // Other state updates, including system registers.
   void PrintSystemRegister(SystemRegister id);
   void PrintTakenBranch(const Instruction* target);
+  void LogSystemRegister(SystemRegister id) {
+    if (ShouldTraceSysRegs()) PrintSystemRegister(id);
+  }
+  void LogTakenBranch(const Instruction* target) {
+    if (ShouldTraceBranches()) PrintTakenBranch(target);
+  }
 
-  // Like Print* (above), but respect GetTraceParameters().
-  void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
-    if (GetTraceParameters() & LOG_REGS) PrintRegister(code, r31mode);
+  // Trace memory accesses.
+
+  // Common, contiguous register accesses (such as for scalars).
+  // The *Write variants automatically set kPrintRegPartial on the format.
+  void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
+  void PrintExtendingRead(int rt_code,
+                          PrintRegisterFormat format,
+                          int access_size_in_bytes,
+                          uintptr_t address);
+  void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
+  void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
+  void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
+  // Simple, unpredicated SVE accesses always access the whole vector, and never
+  // know the lane type, so there's no need to accept a `format`.
+  void PrintZRead(int rt_code, uintptr_t address) {
+    vregisters_[rt_code].NotifyRegisterLogged();
+    PrintZAccess(rt_code, "<-", address);
   }
-  void LogVRegister(unsigned code, PrintRegisterFormat format) {
-    if (GetTraceParameters() & LOG_VREGS) PrintVRegister(code, format);
+  void PrintZWrite(int rt_code, uintptr_t address) {
+    PrintZAccess(rt_code, "->", address);
   }
-  void LogSystemRegister(SystemRegister id) {
-    if (GetTraceParameters() & LOG_SYSREGS) PrintSystemRegister(id);
+  void PrintPRead(int rt_code, uintptr_t address) {
+    pregisters_[rt_code].NotifyRegisterLogged();
+    PrintPAccess(rt_code, "<-", address);
+  }
+  void PrintPWrite(int rt_code, uintptr_t address) {
+    PrintPAccess(rt_code, "->", address);
   }
-  void LogTakenBranch(const Instruction* target) {
-    if (GetTraceParameters() & LOG_BRANCH) PrintTakenBranch(target);
-  }
-
-  // Print memory accesses.
-  void PrintRead(uintptr_t address,
-                 unsigned reg_code,
-                 PrintRegisterFormat format);
-  void PrintWrite(uintptr_t address,
-                  unsigned reg_code,
-                  PrintRegisterFormat format);
-  void PrintVRead(uintptr_t address,
-                  unsigned reg_code,
-                  PrintRegisterFormat format,
-                  unsigned lane);
-  void PrintVWrite(uintptr_t address,
-                   unsigned reg_code,
-                   PrintRegisterFormat format,
-                   unsigned lane);
 
   // Like Print* (above), but respect GetTraceParameters().
-  void LogRead(uintptr_t address,
-               unsigned reg_code,
-               PrintRegisterFormat format) {
-    if (GetTraceParameters() & LOG_REGS) PrintRead(address, reg_code, format);
-  }
-  void LogWrite(uintptr_t address,
-                unsigned reg_code,
-                PrintRegisterFormat format) {
-    if (GetTraceParameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
-  }
-  void LogVRead(uintptr_t address,
-                unsigned reg_code,
-                PrintRegisterFormat format,
-                unsigned lane = 0) {
-    if (GetTraceParameters() & LOG_VREGS) {
-      PrintVRead(address, reg_code, format, lane);
+  void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+    if (ShouldTraceRegs()) PrintRead(rt_code, format, address);
+  }
+  void LogExtendingRead(int rt_code,
+                        PrintRegisterFormat format,
+                        int access_size_in_bytes,
+                        uintptr_t address) {
+    if (ShouldTraceRegs()) {
+      PrintExtendingRead(rt_code, format, access_size_in_bytes, address);
     }
   }
-  void LogVWrite(uintptr_t address,
-                 unsigned reg_code,
-                 PrintRegisterFormat format,
-                 unsigned lane = 0) {
-    if (GetTraceParameters() & LOG_WRITE) {
-      PrintVWrite(address, reg_code, format, lane);
-    }
+  void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+    if (ShouldTraceWrites()) PrintWrite(rt_code, format, address);
+  }
+  void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+    if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address);
+  }
+  void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
+    if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address);
+  }
+  void LogZRead(int rt_code, uintptr_t address) {
+    if (ShouldTraceVRegs()) PrintZRead(rt_code, address);
+  }
+  void LogZWrite(int rt_code, uintptr_t address) {
+    if (ShouldTraceWrites()) PrintZWrite(rt_code, address);
+  }
+  void LogPRead(int rt_code, uintptr_t address) {
+    if (ShouldTraceVRegs()) PrintPRead(rt_code, address);
+  }
+  void LogPWrite(int rt_code, uintptr_t address) {
+    if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
+  }
+
+  // Helpers for the above, where the access operation is parameterised.
+  // - For loads, set op = "<-".
+  // - For stores, set op = "->".
+  void PrintAccess(int rt_code,
+                   PrintRegisterFormat format,
+                   const char* op,
+                   uintptr_t address);
+  void PrintVAccess(int rt_code,
+                    PrintRegisterFormat format,
+                    const char* op,
+                    uintptr_t address);
+  // Simple, unpredicated SVE accesses always access the whole vector, and never
+  // know the lane type, so these don't accept a `format`.
+  void PrintZAccess(int rt_code, const char* op, uintptr_t address);
+  void PrintPAccess(int rt_code, const char* op, uintptr_t address);
+
+  // Multiple-structure accesses.
+  void PrintVStructAccess(int rt_code,
+                          int reg_count,
+                          PrintRegisterFormat format,
+                          const char* op,
+                          uintptr_t address);
+  // Single-structure (single-lane) accesses.
+  void PrintVSingleStructAccess(int rt_code,
+                                int reg_count,
+                                int lane,
+                                PrintRegisterFormat format,
+                                const char* op,
+                                uintptr_t address);
+  // Replicating accesses.
+  void PrintVReplicatingStructAccess(int rt_code,
+                                     int reg_count,
+                                     PrintRegisterFormat format,
+                                     const char* op,
+                                     uintptr_t address);
+
+  // Multiple-structure accesses.
+  void PrintZStructAccess(int rt_code,
+                          int reg_count,
+                          const LogicPRegister& pg,
+                          PrintRegisterFormat format,
+                          int msize_in_bytes,
+                          const char* op,
+                          const LogicSVEAddressVector& addr);
+
+  // Register-printing helper for all structured accessors.
+  //
+  // All lanes (according to `format`) are printed, but lanes indicated by
+  // `focus_mask` are of particular interest. Each bit corresponds to a byte in
+  // the printed register, in a manner similar to SVE's predicates. Currently,
+  // this is used to determine when to print human-readable FP annotations.
+  void PrintVRegistersForStructuredAccess(int rt_code,
+                                          int reg_count,
+                                          uint16_t focus_mask,
+                                          PrintRegisterFormat format);
+
+  // As for the VRegister variant, but print partial Z register names.
+  void PrintZRegistersForStructuredAccess(int rt_code,
+                                          int q_index,
+                                          int reg_count,
+                                          uint16_t focus_mask,
+                                          PrintRegisterFormat format);
+
+  // Print part of a memory access. This should be used for annotating
+  // non-trivial accesses, such as structured or sign-extending loads. Call
+  // Print*Register (or Print*RegistersForStructuredAccess), then
+  // PrintPartialAccess for each contiguous access that makes up the
+  // instruction.
+  //
+  //  access_mask:
+  //      The lanes to be printed. Each bit corresponds to a byte in the printed
+  //      register, in a manner similar to SVE's predicates, except that the
+  //      lane size is not respected when interpreting lane_mask: unaligned bits
+  //      must be zeroed.
+  //
+  //      This function asserts that this mask is non-zero.
+  //
+  //  future_access_mask:
+  //      The lanes to be printed by a future invocation. This must be specified
+  //      because vertical lines are drawn for partial accesses that haven't yet
+  //      been printed. The format is the same as for accessed_mask.
+  //
+  //      If a lane is active in both `access_mask` and `future_access_mask`,
+  //      `access_mask` takes precedence.
+  //
+  //  struct_element_count:
+  //      The number of elements in each structure. For non-structured accesses,
+  //      set this to one. Along with lane_size_in_bytes, this is used determine
+  //      the size of each access, and to format the accessed value.
+  //
+  //  op:
+  //      For stores, use "->". For loads, use "<-".
+  //
+  //  address:
+  //      The address of this partial access. (Not the base address of the whole
+  //      instruction.) The traced value is read from this address (according to
+  //      part_count and lane_size_in_bytes) so it must be accessible, and when
+  //      tracing stores, the store must have been executed before this function
+  //      is called.
+  //
+  //  reg_size_in_bytes:
+  //      The size of the register being accessed. This helper is usually used
+  //      for V registers or Q-sized chunks of Z registers, so that is the
+  //      default, but it is possible to use this to annotate X register
+  //      accesses by specifying kXRegSizeInBytes.
+  //
+  // The return value is a future_access_mask suitable for the next iteration,
+  // so that it is possible to execute this in a loop, until the mask is zero.
+  // Note that accessed_mask must still be updated by the caller for each call.
+  uint16_t PrintPartialAccess(uint16_t access_mask,
+                              uint16_t future_access_mask,
+                              int struct_element_count,
+                              int lane_size_in_bytes,
+                              const char* op,
+                              uintptr_t address,
+                              int reg_size_in_bytes = kQRegSizeInBytes);
+
+  // Print an abstract register value. This works for all register types, and
+  // can print parts of registers. This exists to ensure consistent formatting
+  // of values.
+  void PrintRegisterValue(const uint8_t* value,
+                          int value_size,
+                          PrintRegisterFormat format);
+  template <typename T>
+  void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) {
+    PrintRegisterValue(sim_register.GetBytes(),
+                       std::min(sim_register.GetSizeInBytes(),
+                                kQRegSizeInBytes),
+                       format);
   }
 
-  // Helper functions for register tracing.
-  void PrintRegisterRawHelper(unsigned code,
-                              Reg31Mode r31mode,
-                              int size_in_bytes = kXRegSizeInBytes);
-  void PrintVRegisterRawHelper(unsigned code,
-                               int bytes = kQRegSizeInBytes,
-                               int lsb = 0);
-  void PrintVRegisterFPHelper(unsigned code,
-                              unsigned lane_size_in_bytes,
-                              int lane_count = 1,
-                              int rightmost_lane = 0);
+  // As above, but format as an SVE predicate value, using binary notation with
+  // spaces between each bit so that they align with the Z register bytes that
+  // they predicate.
+  void PrintPRegisterValue(uint16_t value);
+
+  void PrintRegisterValueFPAnnotations(const uint8_t* value,
+                                       uint16_t lane_mask,
+                                       PrintRegisterFormat format);
+  template <typename T>
+  void PrintRegisterValueFPAnnotations(const T& sim_register,
+                                       uint16_t lane_mask,
+                                       PrintRegisterFormat format) {
+    PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format);
+  }
+  template <typename T>
+  void PrintRegisterValueFPAnnotations(const T& sim_register,
+                                       PrintRegisterFormat format) {
+    PrintRegisterValueFPAnnotations(sim_register.GetBytes(),
+                                    GetPrintRegLaneMask(format),
+                                    format);
+  }
 
   VIXL_NO_RETURN void DoUnreachable(const Instruction* instr);
   void DoTrace(const Instruction* instr);
@@ -1587,10 +2324,13 @@ class Simulator : public DecoderVisitor {
                                      Reg31Mode mode = Reg31IsZeroRegister);
   static const char* XRegNameForCode(unsigned code,
                                      Reg31Mode mode = Reg31IsZeroRegister);
+  static const char* BRegNameForCode(unsigned code);
   static const char* HRegNameForCode(unsigned code);
   static const char* SRegNameForCode(unsigned code);
   static const char* DRegNameForCode(unsigned code);
   static const char* VRegNameForCode(unsigned code);
+  static const char* ZRegNameForCode(unsigned code);
+  static const char* PRegNameForCode(unsigned code);
 
   bool IsColouredTrace() const { return coloured_trace_; }
   VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) {
@@ -1609,18 +2349,28 @@ class Simulator : public DecoderVisitor {
     return GetTraceParameters();
   }
 
+  bool ShouldTraceWrites() const {
+    return (GetTraceParameters() & LOG_WRITE) != 0;
+  }
+  bool ShouldTraceRegs() const {
+    return (GetTraceParameters() & LOG_REGS) != 0;
+  }
+  bool ShouldTraceVRegs() const {
+    return (GetTraceParameters() & LOG_VREGS) != 0;
+  }
+  bool ShouldTraceSysRegs() const {
+    return (GetTraceParameters() & LOG_SYSREGS) != 0;
+  }
+  bool ShouldTraceBranches() const {
+    return (GetTraceParameters() & LOG_BRANCH) != 0;
+  }
+
   void SetTraceParameters(int parameters);
   VIXL_DEPRECATED("SetTraceParameters",
                   void set_trace_parameters(int parameters)) {
     SetTraceParameters(parameters);
   }
 
-  void SetInstructionStats(bool value);
-  VIXL_DEPRECATED("SetInstructionStats",
-                  void set_instruction_stats(bool value)) {
-    SetInstructionStats(value);
-  }
-
   // Clear the simulated local monitor to force the next store-exclusive
   // instruction to fail.
   void ClearLocalMonitor() { local_monitor_.Clear(); }
@@ -1803,6 +2553,92 @@ class Simulator : public DecoderVisitor {
   };
 #endif
 
+  // Configure the simulated value of 'VL', which is the size of a Z register.
+  // Because this cannot occur during a program's lifetime, this function also
+  // resets the SVE registers.
+  void SetVectorLengthInBits(unsigned vector_length);
+
+  unsigned GetVectorLengthInBits() const { return vector_length_; }
+  unsigned GetVectorLengthInBytes() const {
+    VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0);
+    return vector_length_ / kBitsPerByte;
+  }
+  unsigned GetPredicateLengthInBits() const {
+    VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0);
+    return GetVectorLengthInBits() / kZRegBitsPerPRegBit;
+  }
+  unsigned GetPredicateLengthInBytes() const {
+    VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0);
+    return GetVectorLengthInBytes() / kZRegBitsPerPRegBit;
+  }
+
+  unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const {
+    if (IsSVEFormat(vform)) {
+      return GetVectorLengthInBits();
+    } else {
+      return vixl::aarch64::RegisterSizeInBitsFromFormat(vform);
+    }
+  }
+
+  unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const {
+    unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform);
+    VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
+    return size_in_bits / kBitsPerByte;
+  }
+
+  int LaneCountFromFormat(VectorFormat vform) const {
+    if (IsSVEFormat(vform)) {
+      return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform);
+    } else {
+      return vixl::aarch64::LaneCountFromFormat(vform);
+    }
+  }
+
+  bool IsFirstActive(VectorFormat vform,
+                     const LogicPRegister& mask,
+                     const LogicPRegister& bits) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      if (mask.IsActive(vform, i)) {
+        return bits.IsActive(vform, i);
+      }
+    }
+    return false;
+  }
+
+  bool AreNoneActive(VectorFormat vform,
+                     const LogicPRegister& mask,
+                     const LogicPRegister& bits) {
+    for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+      if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool IsLastActive(VectorFormat vform,
+                    const LogicPRegister& mask,
+                    const LogicPRegister& bits) {
+    for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
+      if (mask.IsActive(vform, i)) {
+        return bits.IsActive(vform, i);
+      }
+    }
+    return false;
+  }
+
+  void PredTest(VectorFormat vform,
+                const LogicPRegister& mask,
+                const LogicPRegister& bits) {
+    ReadNzcv().SetN(IsFirstActive(vform, mask, bits));
+    ReadNzcv().SetZ(AreNoneActive(vform, mask, bits));
+    ReadNzcv().SetC(!IsLastActive(vform, mask, bits));
+    ReadNzcv().SetV(0);
+    LogSystemRegister(NZCV);
+  }
+
+  SimPRegister& GetPTrue() { return pregister_all_true_; }
+
  protected:
   const char* clr_normal;
   const char* clr_flag_name;
@@ -1811,6 +2647,8 @@ class Simulator : public DecoderVisitor {
   const char* clr_reg_value;
   const char* clr_vreg_name;
   const char* clr_vreg_value;
+  const char* clr_preg_name;
+  const char* clr_preg_value;
   const char* clr_memory_address;
   const char* clr_warning;
   const char* clr_warning_message;
@@ -1818,6 +2656,13 @@ class Simulator : public DecoderVisitor {
   const char* clr_branch_marker;
 
   // Simulation helpers ------------------------------------
+
+  void ResetSystemRegisters();
+  void ResetRegisters();
+  void ResetVRegisters();
+  void ResetPRegisters();
+  void ResetFFR();
+
   bool ConditionPassed(Condition cond) {
     switch (cond) {
       case eq:
@@ -1907,7 +2752,7 @@ class Simulator : public DecoderVisitor {
   }
 
   int64_t ShiftOperand(unsigned reg_size,
-                       int64_t value,
+                       uint64_t value,
                        Shift shift_type,
                        unsigned amount) const;
   int64_t ExtendValue(unsigned reg_width,
@@ -1919,6 +2764,11 @@ class Simulator : public DecoderVisitor {
   void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
   void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
   void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+  void ld1r(VectorFormat vform,
+            VectorFormat unpack_vform,
+            LogicVRegister dst,
+            uint64_t addr,
+            bool is_signed = false);
   void ld2(VectorFormat vform,
            LogicVRegister dst1,
            LogicVRegister dst2,
@@ -2020,16 +2870,43 @@ class Simulator : public DecoderVisitor {
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
+  // Add `value` to each lane of `src1`, treating `value` as unsigned for the
+  // purposes of setting the saturation flags.
+  LogicVRegister add_uint(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          uint64_t value);
   LogicVRegister addp(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
+  LogicPRegister brka(LogicPRegister pd,
+                      const LogicPRegister& pg,
+                      const LogicPRegister& pn);
+  LogicPRegister brkb(LogicPRegister pd,
+                      const LogicPRegister& pg,
+                      const LogicPRegister& pn);
+  LogicPRegister brkn(LogicPRegister pdm,
+                      const LogicPRegister& pg,
+                      const LogicPRegister& pn);
+  LogicPRegister brkpa(LogicPRegister pd,
+                       const LogicPRegister& pg,
+                       const LogicPRegister& pn,
+                       const LogicPRegister& pm);
+  LogicPRegister brkpb(LogicPRegister pd,
+                       const LogicPRegister& pg,
+                       const LogicPRegister& pn,
+                       const LogicPRegister& pm);
+  // dst = srca + src1 * src2
   LogicVRegister mla(VectorFormat vform,
                      LogicVRegister dst,
+                     const LogicVRegister& srca,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
+  // dst = srca - src1 * src2
   LogicVRegister mls(VectorFormat vform,
                      LogicVRegister dst,
+                     const LogicVRegister& srca,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
   LogicVRegister mul(VectorFormat vform,
@@ -2055,6 +2932,14 @@ class Simulator : public DecoderVisitor {
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
+  LogicVRegister sdiv(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
+  LogicVRegister udiv(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2);
 
   typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
                                                    LogicVRegister dst,
@@ -2101,6 +2986,10 @@ class Simulator : public DecoderVisitor {
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
                        int index);
+  LogicVRegister smulh(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
   LogicVRegister smull(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
@@ -2161,6 +3050,10 @@ class Simulator : public DecoderVisitor {
                         const LogicVRegister& src1,
                         const LogicVRegister& src2,
                         int index);
+  LogicVRegister umulh(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2);
   LogicVRegister sqdmull(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
@@ -2225,6 +3118,12 @@ class Simulator : public DecoderVisitor {
                      LogicVRegister dst,
                      const LogicVRegister& src1,
                      const LogicVRegister& src2);
+  // Subtract `value` from each lane of `src1`, treating `value` as unsigned for
+  // the purposes of setting the saturation flags.
+  LogicVRegister sub_uint(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src1,
+                          uint64_t value);
   LogicVRegister and_(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
@@ -2267,6 +3166,9 @@ class Simulator : public DecoderVisitor {
   LogicVRegister clz(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
+  LogicVRegister cnot(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src);
   LogicVRegister cnt(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
@@ -2278,8 +3180,11 @@ class Simulator : public DecoderVisitor {
                       const LogicVRegister& src);
   LogicVRegister rev(VectorFormat vform,
                      LogicVRegister dst,
-                     const LogicVRegister& src,
-                     int revSize);
+                     const LogicVRegister& src);
+  LogicVRegister rev_byte(VectorFormat vform,
+                          LogicVRegister dst,
+                          const LogicVRegister& src,
+                          int rev_size);
   LogicVRegister rev16(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
@@ -2327,6 +3232,7 @@ class Simulator : public DecoderVisitor {
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
+                       const LogicVRegister& acc,
                        int index,
                        int rot);
   LogicVRegister fcmla(VectorFormat vform,
@@ -2335,17 +3241,25 @@ class Simulator : public DecoderVisitor {
                        const LogicVRegister& src2,
                        int index,
                        int rot);
-  template <typename T>
   LogicVRegister fcmla(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src1,
                        const LogicVRegister& src2,
+                       const LogicVRegister& acc,
                        int rot);
-  LogicVRegister fcmla(VectorFormat vform,
+  template <typename T>
+  LogicVRegister fadda(VectorFormat vform,
+                       LogicVRegister acc,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister fadda(VectorFormat vform,
+                       LogicVRegister acc,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister index(VectorFormat vform,
                        LogicVRegister dst,
-                       const LogicVRegister& src1,
-                       const LogicVRegister& src2,
-                       int rot);
+                       uint64_t start,
+                       uint64_t step);
   LogicVRegister ins_element(VectorFormat vform,
                              LogicVRegister dst,
                              int dst_index,
@@ -2355,13 +3269,36 @@ class Simulator : public DecoderVisitor {
                                LogicVRegister dst,
                                int dst_index,
                                uint64_t imm);
+  LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister dup_element(VectorFormat vform,
                              LogicVRegister dst,
                              const LogicVRegister& src,
                              int src_index);
+  LogicVRegister dup_elements_to_segments(VectorFormat vform,
+                                          LogicVRegister dst,
+                                          const LogicVRegister& src,
+                                          int src_index);
   LogicVRegister dup_immediate(VectorFormat vform,
                                LogicVRegister dst,
                                uint64_t imm);
+  LogicVRegister mov(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src);
+  LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src);
+  LogicVRegister mov_merging(VectorFormat vform,
+                             LogicVRegister dst,
+                             const SimPRegister& pg,
+                             const LogicVRegister& src);
+  LogicVRegister mov_zeroing(VectorFormat vform,
+                             LogicVRegister dst,
+                             const SimPRegister& pg,
+                             const LogicVRegister& src);
+  LogicPRegister mov_merging(LogicPRegister dst,
+                             const LogicPRegister& pg,
+                             const LogicPRegister& src);
+  LogicPRegister mov_zeroing(LogicPRegister dst,
+                             const LogicPRegister& pg,
+                             const LogicPRegister& src);
   LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
   LogicVRegister orr(VectorFormat vform,
@@ -2376,6 +3313,32 @@ class Simulator : public DecoderVisitor {
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
+  // Perform a "conditional last" operation. The first part of the pair is true
+  // if any predicate lane is active, false otherwise. The second part takes the
+  // value of the last active (plus offset) lane, or last (plus offset) lane if
+  // none active.
+  std::pair<bool, uint64_t> clast(VectorFormat vform,
+                                  const LogicPRegister& pg,
+                                  const LogicVRegister& src2,
+                                  int offset_from_last_active);
+  LogicVRegister compact(VectorFormat vform,
+                         LogicVRegister dst,
+                         const LogicPRegister& pg,
+                         const LogicVRegister& src);
+  LogicVRegister splice(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicPRegister& pg,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister sel(VectorFormat vform,
+                     LogicVRegister dst,
+                     const SimPRegister& pg,
+                     const LogicVRegister& src1,
+                     const LogicVRegister& src2);
+  LogicPRegister sel(LogicPRegister dst,
+                     const LogicPRegister& pg,
+                     const LogicPRegister& src1,
+                     const LogicPRegister& src2);
   LogicVRegister sminmax(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src1,
@@ -2416,6 +3379,7 @@ class Simulator : public DecoderVisitor {
                         const LogicVRegister& src);
   LogicVRegister sminmaxv(VectorFormat vform,
                           LogicVRegister dst,
+                          const LogicPRegister& pg,
                           const LogicVRegister& src,
                           bool max);
   LogicVRegister smaxv(VectorFormat vform,
@@ -2436,6 +3400,14 @@ class Simulator : public DecoderVisitor {
   LogicVRegister sxtl2(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src);
+  LogicVRegister uxt(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     unsigned from_size_in_bits);
+  LogicVRegister sxt(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     unsigned from_size_in_bits);
   LogicVRegister tbl(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& tab,
@@ -2460,6 +3432,10 @@ class Simulator : public DecoderVisitor {
                      const LogicVRegister& ind);
   LogicVRegister Table(VectorFormat vform,
                        LogicVRegister dst,
+                       const LogicVRegister& src,
+                       const LogicVRegister& tab);
+  LogicVRegister Table(VectorFormat vform,
+                       LogicVRegister dst,
                        const LogicVRegister& ind,
                        bool zero_out_of_bounds,
                        const LogicVRegister* tab1,
@@ -2580,6 +3556,7 @@ class Simulator : public DecoderVisitor {
                        const LogicVRegister& src2);
   LogicVRegister uminmaxv(VectorFormat vform,
                           LogicVRegister dst,
+                          const LogicPRegister& pg,
                           const LogicVRegister& src,
                           bool max);
   LogicVRegister umaxv(VectorFormat vform,
@@ -2617,11 +3594,27 @@ class Simulator : public DecoderVisitor {
                      const LogicVRegister& src,
                      int shift);
   LogicVRegister scvtf(VectorFormat vform,
+                       unsigned dst_data_size_in_bits,
+                       unsigned src_data_size_in_bits,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src,
+                       FPRounding round,
+                       int fbits = 0);
+  LogicVRegister scvtf(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int fbits,
                        FPRounding rounding_mode);
   LogicVRegister ucvtf(VectorFormat vform,
+                       unsigned dst_data_size,
+                       unsigned src_data_size,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src,
+                       FPRounding round,
+                       int fbits = 0);
+  LogicVRegister ucvtf(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        int fbits,
@@ -2706,9 +3699,9 @@ class Simulator : public DecoderVisitor {
                      const LogicVRegister& src);
   LogicVRegister extractnarrow(VectorFormat vform,
                                LogicVRegister dst,
-                               bool dstIsSigned,
+                               bool dst_is_signed,
                                const LogicVRegister& src,
-                               bool srcIsSigned);
+                               bool src_is_signed);
   LogicVRegister xtn(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src);
@@ -2725,7 +3718,7 @@ class Simulator : public DecoderVisitor {
                          LogicVRegister dst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2,
-                         bool issigned);
+                         bool is_signed);
   LogicVRegister saba(VectorFormat vform,
                       LogicVRegister dst,
                       const LogicVRegister& src1,
@@ -2951,19 +3944,23 @@ class Simulator : public DecoderVisitor {
   template <typename T>
   LogicVRegister fmla(VectorFormat vform,
                       LogicVRegister dst,
+                      const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fmla(VectorFormat vform,
                       LogicVRegister dst,
+                      const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   template <typename T>
   LogicVRegister fmls(VectorFormat vform,
                       LogicVRegister dst,
+                      const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fmls(VectorFormat vform,
                       LogicVRegister dst,
+                      const LogicVRegister& srca,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
   LogicVRegister fnmul(VectorFormat vform,
@@ -3023,6 +4020,31 @@ class Simulator : public DecoderVisitor {
   LogicVRegister frecpx(VectorFormat vform,
                         LogicVRegister dst,
                         const LogicVRegister& src);
+  LogicVRegister ftsmul(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister ftssel(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister ftmad(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src1,
+                       const LogicVRegister& src2,
+                       unsigned index);
+  LogicVRegister fexpa(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
+  template <typename T>
+  LogicVRegister fscale(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
+  LogicVRegister fscale(VectorFormat vform,
+                        LogicVRegister dst,
+                        const LogicVRegister& src1,
+                        const LogicVRegister& src2);
   template <typename T>
   LogicVRegister fabs_(VectorFormat vform,
                        LogicVRegister dst,
@@ -3034,19 +4056,40 @@ class Simulator : public DecoderVisitor {
                       LogicVRegister dst,
                       const LogicVRegister& src1,
                       const LogicVRegister& src2);
-
   LogicVRegister frint(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
                        bool inexact_exception = false,
                        FrintMode frint_mode = kFrintToInteger);
+  LogicVRegister fcvt(VectorFormat vform,
+                      unsigned dst_data_size_in_bits,
+                      unsigned src_data_size_in_bits,
+                      LogicVRegister dst,
+                      const LogicPRegister& pg,
+                      const LogicVRegister& src);
+  LogicVRegister fcvts(VectorFormat vform,
+                       unsigned dst_data_size_in_bits,
+                       unsigned src_data_size_in_bits,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src,
+                       FPRounding round,
+                       int fbits = 0);
   LogicVRegister fcvts(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
                        int fbits = 0);
   LogicVRegister fcvtu(VectorFormat vform,
+                       unsigned dst_data_size_in_bits,
+                       unsigned src_data_size_in_bits,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src,
+                       FPRounding round,
+                       int fbits = 0);
+  LogicVRegister fcvtu(VectorFormat vform,
                        LogicVRegister dst,
                        const LogicVRegister& src,
                        FPRounding rounding_mode,
@@ -3086,16 +4129,78 @@ class Simulator : public DecoderVisitor {
                         LogicVRegister dst,
                         const LogicVRegister& src);
 
+  LogicPRegister pfalse(LogicPRegister dst);
+  LogicPRegister pfirst(LogicPRegister dst,
+                        const LogicPRegister& pg,
+                        const LogicPRegister& src);
+  LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern);
+  LogicPRegister pnext(VectorFormat vform,
+                       LogicPRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicPRegister& src);
+
+  LogicVRegister asrd(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicVRegister& src1,
+                      int shift);
+
+  LogicVRegister andv(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicPRegister& pg,
+                      const LogicVRegister& src);
+  LogicVRegister eorv(VectorFormat vform,
+                      LogicVRegister dst,
+                      const LogicPRegister& pg,
+                      const LogicVRegister& src);
+  LogicVRegister orv(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicPRegister& pg,
+                     const LogicVRegister& src);
+  LogicVRegister saddv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister sminv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister smaxv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister uaddv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister uminv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+  LogicVRegister umaxv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicPRegister& pg,
+                       const LogicVRegister& src);
+
   template <typename T>
-  struct TFPMinMaxOp {
+  struct TFPPairOp {
     typedef T (Simulator::*type)(T a, T b);
   };
 
   template <typename T>
-  LogicVRegister fminmaxv(VectorFormat vform,
-                          LogicVRegister dst,
-                          const LogicVRegister& src,
-                          typename TFPMinMaxOp<T>::type Op);
+  LogicVRegister FPPairedAcrossHelper(VectorFormat vform,
+                                      LogicVRegister dst,
+                                      const LogicVRegister& src,
+                                      typename TFPPairOp<T>::type fn,
+                                      uint64_t inactive_value);
+
+  LogicVRegister FPPairedAcrossHelper(
+      VectorFormat vform,
+      LogicVRegister dst,
+      const LogicVRegister& src,
+      typename TFPPairOp<vixl::internal::SimFloat16>::type fn16,
+      typename TFPPairOp<float>::type fn32,
+      typename TFPPairOp<double>::type fn64,
+      uint64_t inactive_value);
 
   LogicVRegister fminv(VectorFormat vform,
                        LogicVRegister dst,
@@ -3109,6 +4214,9 @@ class Simulator : public DecoderVisitor {
   LogicVRegister fmaxnmv(VectorFormat vform,
                          LogicVRegister dst,
                          const LogicVRegister& src);
+  LogicVRegister faddv(VectorFormat vform,
+                       LogicVRegister dst,
+                       const LogicVRegister& src);
 
   static const uint32_t CRC32_POLY = 0x04C11DB7;
   static const uint32_t CRC32C_POLY = 0x1EDC6F41;
@@ -3209,6 +4317,129 @@ class Simulator : public DecoderVisitor {
   void DoSaveCPUFeatures(const Instruction* instr);
   void DoRestoreCPUFeatures(const Instruction* instr);
 
+  // General arithmetic helpers ----------------------------
+
+  // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or
+  // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must
+  // be a valid int<n>_t.
+  uint64_t IncDecN(uint64_t acc,
+                   int64_t delta,
+                   unsigned n,
+                   bool is_saturating = false,
+                   bool is_signed = false);
+
+  // SVE helpers -------------------------------------------
+  LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op,
+                                                     VectorFormat vform,
+                                                     LogicVRegister zd,
+                                                     const LogicVRegister& zn,
+                                                     const LogicVRegister& zm);
+
+  LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
+                                           LogicPRegister Pd,
+                                           const LogicPRegister& pn,
+                                           const LogicPRegister& pm);
+
+  LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,
+                                     VectorFormat vform,
+                                     LogicVRegister zd,
+                                     uint64_t imm);
+  enum UnpackType { kHiHalf, kLoHalf };
+  enum ExtendType { kSignedExtend, kUnsignedExtend };
+  LogicVRegister unpk(VectorFormat vform,
+                      LogicVRegister zd,
+                      const LogicVRegister& zn,
+                      UnpackType unpack_type,
+                      ExtendType extend_type);
+
+  LogicPRegister SVEIntCompareVectorsHelper(Condition cc,
+                                            VectorFormat vform,
+                                            LogicPRegister dst,
+                                            const LogicPRegister& mask,
+                                            const LogicVRegister& src1,
+                                            const LogicVRegister& src2,
+                                            bool is_wide_elements = false,
+                                            FlagsUpdate flags = SetFlags);
+
+  void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
+                                           VectorFormat vform,
+                                           SVEOffsetModifier mod);
+
+  // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`.
+  //
+  // `zt_code` specifies the code of the first register (zt). Each additional
+  // register (up to `reg_count`) is `(zt_code + i) % 32`.
+  //
+  // This helper calls LogZWrite in the proper way, according to `addr`.
+  void SVEStructuredStoreHelper(VectorFormat vform,
+                                const LogicPRegister& pg,
+                                unsigned zt_code,
+                                const LogicSVEAddressVector& addr);
+  // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
+  void SVEStructuredLoadHelper(VectorFormat vform,
+                               const LogicPRegister& pg,
+                               unsigned zt_code,
+                               const LogicSVEAddressVector& addr,
+                               bool is_signed = false);
+
+  enum SVEFaultTolerantLoadType {
+    // - Elements active in both FFR and pg are accessed as usual. If the access
+    //   fails, the corresponding lane and all subsequent lanes are filled with
+    //   an unpredictable value, and made inactive in FFR.
+    //
+    // - Elements active in FFR but not pg are set to zero.
+    //
+    // - Elements that are not active in FFR are filled with an unpredictable
+    //   value, regardless of pg.
+    kSVENonFaultLoad,
+
+    // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the
+    // first active element is always accessed, regardless of FFR, and will
+    // generate a real fault if it is inaccessible. If the lane is not active in
+    // FFR, the actual value loaded into the result is still unpredictable.
+    kSVEFirstFaultLoad
+  };
+
+  // Load with first-faulting or non-faulting load semantics, respecting and
+  // updating FFR.
+  void SVEFaultTolerantLoadHelper(VectorFormat vform,
+                                  const LogicPRegister& pg,
+                                  unsigned zt_code,
+                                  const LogicSVEAddressVector& addr,
+                                  SVEFaultTolerantLoadType type,
+                                  bool is_signed);
+
+  LogicVRegister SVEBitwiseShiftHelper(Shift shift_op,
+                                       VectorFormat vform,
+                                       LogicVRegister dst,
+                                       const LogicVRegister& src1,
+                                       const LogicVRegister& src2,
+                                       bool is_wide_elements);
+
+  template <typename T>
+  LogicVRegister FTMaddHelper(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src1,
+                              const LogicVRegister& src2,
+                              uint64_t coeff_pos,
+                              uint64_t coeff_neg);
+
+  // Return the first or last active lane, or -1 if none are active.
+  int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const;
+  int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const;
+
+  int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const;
+
+  // Count active and true lanes in `pn`.
+  int CountActiveAndTrueLanes(VectorFormat vform,
+                              const LogicPRegister& pg,
+                              const LogicPRegister& pn) const;
+
+  // Count the number of lanes referred to by `pattern`, given the vector
+  // length. If `pattern` is not a recognised SVEPredicateConstraint, this
+  // returns zero.
+  int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const;
+
   // Simulate a runtime call.
   void DoRuntimeCall(const Instruction* instr);
 
@@ -3222,15 +4453,21 @@ class Simulator : public DecoderVisitor {
   FILE* stream_;
   PrintDisassembler* print_disasm_;
 
-  // Instruction statistics instrumentation.
-  Instrument* instrumentation_;
-
   // General purpose registers. Register 31 is the stack pointer.
   SimRegister registers_[kNumberOfRegisters];
 
   // Vector registers
   SimVRegister vregisters_[kNumberOfVRegisters];
 
+  // SVE predicate registers.
+  SimPRegister pregisters_[kNumberOfPRegisters];
+
+  // SVE first-fault register.
+  SimFFRRegister ffr_register_;
+
+  // A pseudo SVE predicate register with all bits set to true.
+  SimPRegister pregister_all_true_;
+
   // Program Status Register.
   // bits[31, 27]: Condition flags N, Z, C, and V.
   //               (Negative, Zero, Carry, Overflow)
@@ -3266,8 +4503,10 @@ class Simulator : public DecoderVisitor {
   // Stack
   byte* stack_;
   static const int stack_protection_size_ = 256;
-  // 2 KB stack.
-  static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_;
+  // 8 KB stack.
+  // TODO: Make this configurable, or automatically allocate space as it runs
+  // out (like the OS would try to do).
+  static const int stack_size_ = 8 * 1024 + 2 * stack_protection_size_;
   byte* stack_limit_;
 
   Decoder* decoder_;
@@ -3276,6 +4515,10 @@ class Simulator : public DecoderVisitor {
   bool pc_modified_;
   const Instruction* pc_;
 
+  // If non-NULL, the last instruction was a movprfx, and validity needs to be
+  // checked.
+  Instruction const* movprfx_;
+
   // Branch type register, used for branch target identification.
   BType btype_;
 
@@ -3289,10 +4532,13 @@ class Simulator : public DecoderVisitor {
 
   static const char* xreg_names[];
   static const char* wreg_names[];
+  static const char* breg_names[];
   static const char* hreg_names[];
   static const char* sreg_names[];
   static const char* dreg_names[];
   static const char* vreg_names[];
+  static const char* zreg_names[];
+  static const char* preg_names[];
 
  private:
   static const PACKey kPACKeyIA;
@@ -3301,6 +4547,13 @@ class Simulator : public DecoderVisitor {
   static const PACKey kPACKeyDB;
   static const PACKey kPACKeyGA;
 
+  bool CanReadMemory(uintptr_t address, size_t size);
+
+  // CanReadMemory needs dummy file descriptors, so we use a pipe. We can save
+  // some system call overhead by opening them on construction, rather than on
+  // every call to CanReadMemory.
+  int dummy_pipe_fd_[2];
+
   template <typename T>
   static T FPDefaultNaN();
 
@@ -3353,14 +4606,24 @@ class Simulator : public DecoderVisitor {
     }
   }
 
+  // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of
+  // the destination is set to all true (0xff) when the corresponding
+  // predicate flag is set, and false (0x00) otherwise.
+  SimVRegister ExpandToSimVRegister(const SimPRegister& preg);
+
+  // Set each predicate flag in pd where the corresponding assigned-sized lane
+  // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite
+  // operation to ExpandToSimVRegister(), except that any non-zero lane is
+  // interpreted as true.
+  void ExtractFromSimVRegister(VectorFormat vform,
+                               SimPRegister& pd,  // NOLINT(runtime/references)
+                               SimVRegister vreg);
+
   bool coloured_trace_;
 
   // A set of TraceParameters flags.
   int trace_parameters_;
 
-  // Indicates whether the instruction instrumentation is active.
-  bool instruction_stats_;
-
   // Indicates whether the exclusive-access warning has been printed.
   bool print_exclusive_access_warning_;
   void PrintExclusiveAccessWarning();
@@ -3368,8 +4631,14 @@ class Simulator : public DecoderVisitor {
   CPUFeaturesAuditor cpu_features_auditor_;
   std::vector<CPUFeatures> saved_cpu_features_;
 
-  // The simulated state of RNDR and RNDRRS for generating a random number.
-  uint16_t rndr_state_[3];
+  // State for *rand48 functions, used to simulate randomness with repeatable
+  // behaviour (so that tests are deterministic). This is used to simulate RNDR
+  // and RNDRRS, as well as to simulate a source of entropy for architecturally
+  // undefined behaviour.
+  uint16_t rand_state_[3];
+
+  // A configurable size of SVE vector registers.
+  unsigned vector_length_;
 };
 
 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
diff --git a/src/aarch64/simulator-constants-aarch64.h b/src/aarch64/simulator-constants-aarch64.h
index 3256f30e..e2389f11 100644
--- a/src/aarch64/simulator-constants-aarch64.h
+++ b/src/aarch64/simulator-constants-aarch64.h
@@ -121,7 +121,7 @@ const unsigned kTraceLength = 3 * kInstructionSize;
 enum TraceParameters {
   LOG_DISASM = 1 << 0,   // Log disassembly.
   LOG_REGS = 1 << 1,     // Log general purpose registers.
-  LOG_VREGS = 1 << 2,    // Log NEON and floating-point registers.
+  LOG_VREGS = 1 << 2,    // Log SVE, NEON and floating-point registers.
   LOG_SYSREGS = 1 << 3,  // Log the flags and system registers.
   LOG_WRITE = 1 << 4,    // Log writes to memory.
   LOG_BRANCH = 1 << 5,   // Log taken branches.
diff --git a/src/cpu-features.cc b/src/cpu-features.cc
index ea1e0d3e..08db3f44 100644
--- a/src/cpu-features.cc
+++ b/src/cpu-features.cc
@@ -37,31 +37,9 @@
 
 namespace vixl {
 
-static uint64_t MakeFeatureMask(CPUFeatures::Feature feature) {
-  if (feature == CPUFeatures::kNone) {
-    return 0;
-  } else {
-    // Check that the shift is well-defined, and that the feature is valid.
-    VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures <=
-                       (sizeof(uint64_t) * 8));
-    VIXL_ASSERT(feature < CPUFeatures::kNumberOfFeatures);
-    return UINT64_C(1) << feature;
-  }
-}
-
-CPUFeatures::CPUFeatures(Feature feature0,
-                         Feature feature1,
-                         Feature feature2,
-                         Feature feature3)
-    : features_(0) {
-  Combine(feature0, feature1, feature2, feature3);
-}
-
 CPUFeatures CPUFeatures::All() {
   CPUFeatures all;
-  // Check that the shift is well-defined.
-  VIXL_STATIC_ASSERT(CPUFeatures::kNumberOfFeatures < (sizeof(uint64_t) * 8));
-  all.features_ = (UINT64_C(1) << kNumberOfFeatures) - 1;
+  all.features_.set();
   return all;
 }
 
@@ -89,74 +67,27 @@ void CPUFeatures::Combine(const CPUFeatures& other) {
   features_ |= other.features_;
 }
 
-void CPUFeatures::Combine(Feature feature0,
-                          Feature feature1,
-                          Feature feature2,
-                          Feature feature3) {
-  features_ |= MakeFeatureMask(feature0);
-  features_ |= MakeFeatureMask(feature1);
-  features_ |= MakeFeatureMask(feature2);
-  features_ |= MakeFeatureMask(feature3);
+void CPUFeatures::Combine(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.set(feature);
 }
 
 void CPUFeatures::Remove(const CPUFeatures& other) {
   features_ &= ~other.features_;
 }
 
-void CPUFeatures::Remove(Feature feature0,
-                         Feature feature1,
-                         Feature feature2,
-                         Feature feature3) {
-  features_ &= ~MakeFeatureMask(feature0);
-  features_ &= ~MakeFeatureMask(feature1);
-  features_ &= ~MakeFeatureMask(feature2);
-  features_ &= ~MakeFeatureMask(feature3);
-}
-
-CPUFeatures CPUFeatures::With(const CPUFeatures& other) const {
-  CPUFeatures f(*this);
-  f.Combine(other);
-  return f;
-}
-
-CPUFeatures CPUFeatures::With(Feature feature0,
-                              Feature feature1,
-                              Feature feature2,
-                              Feature feature3) const {
-  CPUFeatures f(*this);
-  f.Combine(feature0, feature1, feature2, feature3);
-  return f;
-}
-
-CPUFeatures CPUFeatures::Without(const CPUFeatures& other) const {
-  CPUFeatures f(*this);
-  f.Remove(other);
-  return f;
-}
-
-CPUFeatures CPUFeatures::Without(Feature feature0,
-                                 Feature feature1,
-                                 Feature feature2,
-                                 Feature feature3) const {
-  CPUFeatures f(*this);
-  f.Remove(feature0, feature1, feature2, feature3);
-  return f;
+void CPUFeatures::Remove(Feature feature) {
+  if (feature != CPUFeatures::kNone) features_.reset(feature);
 }
 
 bool CPUFeatures::Has(const CPUFeatures& other) const {
   return (features_ & other.features_) == other.features_;
 }
 
-bool CPUFeatures::Has(Feature feature0,
-                      Feature feature1,
-                      Feature feature2,
-                      Feature feature3) const {
-  uint64_t mask = MakeFeatureMask(feature0) | MakeFeatureMask(feature1) |
-                  MakeFeatureMask(feature2) | MakeFeatureMask(feature3);
-  return (features_ & mask) == mask;
+bool CPUFeatures::Has(Feature feature) const {
+  return (feature == CPUFeatures::kNone) || features_[feature];
 }
 
-size_t CPUFeatures::Count() const { return CountSetBits(features_); }
+size_t CPUFeatures::Count() const { return features_.count(); }
 
 std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) {
   // clang-format off
@@ -177,12 +108,9 @@ VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE)
 }
 
 CPUFeatures::const_iterator CPUFeatures::begin() const {
-  if (features_ == 0) return const_iterator(this, kNone);
-
-  int feature_number = CountTrailingZeros(features_);
-  vixl::CPUFeatures::Feature feature =
-      static_cast<CPUFeatures::Feature>(feature_number);
-  return const_iterator(this, feature);
+  // For iterators in general, it's undefined to increment `end()`, but here we
+  // control the implementation and it is safe to do this.
+  return ++end();
 }
 
 CPUFeatures::const_iterator CPUFeatures::end() const {
@@ -190,11 +118,11 @@ CPUFeatures::const_iterator CPUFeatures::end() const {
 }
 
 std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) {
-  CPUFeatures::const_iterator it = features.begin();
-  while (it != features.end()) {
-    os << *it;
-    ++it;
-    if (it != features.end()) os << ", ";
+  bool need_separator = false;
+  for (CPUFeatures::Feature feature : features) {
+    if (need_separator) os << ", ";
+    need_separator = true;
+    os << feature;
   }
   return os;
 }
@@ -205,7 +133,7 @@ bool CPUFeaturesConstIterator::operator==(
   return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_);
 }
 
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
+CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() {  // Prefix
   VIXL_ASSERT(IsValid());
   do {
     // Find the next feature. The order is unspecified.
@@ -219,11 +147,11 @@ CPUFeatures::Feature CPUFeaturesConstIterator::operator++() {  // Prefix
     // cpu_features_->Has(kNone) is always true, so this will terminate even if
     // the features list is empty.
   } while (!cpu_features_->Has(feature_));
-  return feature_;
+  return *this;
 }
 
-CPUFeatures::Feature CPUFeaturesConstIterator::operator++(int) {  // Postfix
-  CPUFeatures::Feature result = feature_;
+CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) {  // Postfix
+  CPUFeaturesConstIterator result = *this;
   ++(*this);
   return result;
 }
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 50ddc267..1b0f2c24 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -27,6 +27,7 @@
 #ifndef VIXL_CPU_FEATURES_H
 #define VIXL_CPU_FEATURES_H
 
+#include <bitset>
 #include <ostream>
 
 #include "globals-vixl.h"
@@ -34,16 +35,65 @@
 namespace vixl {
 
 
+// VIXL aims to handle and detect all architectural features that are likely to
+// influence code-generation decisions at EL0 (user-space).
+//
+// - There may be multiple VIXL feature flags for a given architectural
+//   extension. This occurs where the extension allow components to be
+//   implemented independently, or where kernel support is needed, and is likely
+//   to be fragmented.
+//
+//   For example, Pointer Authentication (kPAuth*) has a separate feature flag
+//   for access to PACGA, and to indicate that the QARMA algorithm is
+//   implemented.
+//
+// - Conversely, some extensions have configuration options that do not affect
+//   EL0, so these are presented as a single VIXL feature.
+//
+//   For example, the RAS extension (kRAS) has several variants, but the only
+//   feature relevant to VIXL is the addition of the ESB instruction so we only
+//   need a single flag.
+//
+// - VIXL offers separate flags for separate features even if they're
+//   architecturally linked.
+//
+//   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
+//   but they have separate hardware ID register fields so VIXL presents them as
+//   separate features.
+//
+// - VIXL can detect every feature for which it can generate code.
+//
+// - VIXL can detect some features for which it cannot generate code.
+//
+// The CPUFeatures::Feature enum — derived from the macro list below — is
+// frequently extended. New features may be added to the list at any point, and
+// no assumptions should be made about the numerical values assigned to each
+// enum constant. The symbolic names can be considered to be stable.
+//
+// The debug descriptions are used only for debug output. The 'cpuinfo' strings
+// are informative; VIXL does not use /proc/cpuinfo for feature detection.
+
 // clang-format off
 #define VIXL_CPU_FEATURE_LIST(V)                                               \
   /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
   /* registers, so that the detailed feature registers can be read          */ \
   /* directly.                                                              */ \
+                                                                               \
+  /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
   V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
                                                                                \
   V(kFP,                  "FP",                     "fp")                      \
   V(kNEON,                "NEON",                   "asimd")                   \
   V(kCRC32,               "CRC32",                  "crc32")                   \
+  V(kDGH,                 "DGH",                    "dgh")                     \
+  /* Speculation control features.                                          */ \
+  V(kCSV2,                "CSV2",                   NULL)                      \
+  V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
+  V(kCSV3,                "CSV3",                   NULL)                      \
+  V(kSB,                  "SB",                     "sb")                      \
+  V(kSPECRES,             "SPECRES",                NULL)                      \
+  V(kSSBS,                "SSBS",                   NULL)                      \
+  V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
   /* Cryptographic support instructions.                                    */ \
   V(kAES,                 "AES",                    "aes")                     \
   V(kSHA1,                "SHA1",                   "sha1")                    \
@@ -58,28 +108,36 @@ namespace vixl {
   V(kRDM,                 "RDM",                    "asimdrdm")                \
   /* Scalable Vector Extension.                                             */ \
   V(kSVE,                 "SVE",                    "sve")                     \
+  V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
+  V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
+  V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
+  V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
   /* SDOT and UDOT support (in NEON).                                       */ \
   V(kDotProduct,          "DotProduct",             "asimddp")                 \
+  /* Int8 matrix multiplication (in NEON).                                  */ \
+  V(kI8MM,                "NEON I8MM",              "i8mm")                    \
   /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
   V(kFPHalf,              "FPHalf",                 "fphp")                    \
   V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
+  /* BFloat16 support (in both FP and NEON.)                                */ \
+  V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
   /* The RAS extension, including the ESB instruction.                      */ \
   V(kRAS,                 "RAS",                    NULL)                      \
   /* Data cache clean to the point of persistence: DC CVAP.                 */ \
   V(kDCPoP,               "DCPoP",                  "dcpop")                   \
   /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
-  V(kDCCVADP,             "DCCVADP",                NULL)                      \
+  V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
   /* Cryptographic support instructions.                                    */ \
   V(kSHA3,                "SHA3",                   "sha3")                    \
   V(kSHA512,              "SHA512",                 "sha512")                  \
   V(kSM3,                 "SM3",                    "sm3")                     \
   V(kSM4,                 "SM4",                    "sm4")                     \
   /* Pointer authentication for addresses.                                  */ \
-  V(kPAuth,               "PAuth",                  NULL)                      \
+  V(kPAuth,               "PAuth",                  "paca")                    \
   /* Pointer authentication for addresses uses QARMA.                       */ \
   V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
   /* Generic authentication (using the PACGA instruction).                  */ \
-  V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
+  V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
   /* Generic authentication uses QARMA.                                     */ \
   V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
   /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
@@ -98,13 +156,21 @@ namespace vixl {
   /* Data-independent timing (for selected instructions).                   */ \
   V(kDIT,                 "DIT",                    "dit")                     \
   /* Branch target identification.                                          */ \
-  V(kBTI,                 "BTI",                    NULL)                      \
+  V(kBTI,                 "BTI",                    "bti")                     \
   /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
-  V(kAXFlag,              "AXFlag",                 NULL)                      \
+  V(kAXFlag,              "AXFlag",                 "flagm2")                  \
   /* Random number generation extension,                                    */ \
-  V(kRNG,                 "RNG",                    NULL)                      \
+  V(kRNG,                 "RNG",                    "rng")                     \
   /* Floating-point round to {32,64}-bit integer.                           */ \
-  V(kFrintToFixedSizedInt,"Frint (bounded)",        NULL)
+  V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
+  /* Memory Tagging Extension.                                              */ \
+  V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
+  V(kMTE,                 "MTE",                    NULL)                      \
+  /* PAuth extensions.                                                      */ \
+  V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
+  V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
+  V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
+  V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)
 // clang-format on
 
 
@@ -197,13 +263,13 @@ class CPUFeatures {
   // clang-format on
 
   // By default, construct with no features enabled.
-  CPUFeatures() : features_(0) {}
+  CPUFeatures() : features_{} {}
 
   // Construct with some features already enabled.
-  CPUFeatures(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
+  template <typename T, typename... U>
+  CPUFeatures(T first, U... others) : features_{} {
+    Combine(first, others...);
+  }
 
   // Construct with all features enabled. This can be used to disable feature
   // checking: `Has(...)` returns true regardless of the argument.
@@ -236,41 +302,59 @@ class CPUFeatures {
   // exist in this set are left unchanged.
   void Combine(const CPUFeatures& other);
 
-  // Combine specific features into this set. Features that already exist in
-  // this set are left unchanged.
-  void Combine(Feature feature0,
-               Feature feature1 = kNone,
-               Feature feature2 = kNone,
-               Feature feature3 = kNone);
+  // Combine a specific feature into this set. If it already exists in the set,
+  // the set is left unchanged.
+  void Combine(Feature feature);
+
+  // Combine multiple features (or feature sets) into this set.
+  template <typename T, typename... U>
+  void Combine(T first, U... others) {
+    Combine(first);
+    Combine(others...);
+  }
 
   // Remove features in another CPUFeatures object from this one.
   void Remove(const CPUFeatures& other);
 
-  // Remove specific features from this set.
-  void Remove(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
-
-  // Chaining helpers for convenient construction.
-  CPUFeatures With(const CPUFeatures& other) const;
-  CPUFeatures With(Feature feature0,
-                   Feature feature1 = kNone,
-                   Feature feature2 = kNone,
-                   Feature feature3 = kNone) const;
-  CPUFeatures Without(const CPUFeatures& other) const;
-  CPUFeatures Without(Feature feature0,
-                      Feature feature1 = kNone,
-                      Feature feature2 = kNone,
-                      Feature feature3 = kNone) const;
-
-  // Query features.
-  // Note that an empty query (like `Has(kNone)`) always returns true.
+  // Remove a specific feature from this set. This has no effect if the feature
+  // doesn't exist in the set.
+  void Remove(Feature feature0);
+
+  // Remove multiple features (or feature sets) from this set.
+  template <typename T, typename... U>
+  void Remove(T first, U... others) {
+    Remove(first);
+    Remove(others...);
+  }
+
+  // Chaining helpers for convenient construction by combining other CPUFeatures
+  // or individual Features.
+  template <typename... T>
+  CPUFeatures With(T... others) const {
+    CPUFeatures f(*this);
+    f.Combine(others...);
+    return f;
+  }
+
+  template <typename... T>
+  CPUFeatures Without(T... others) const {
+    CPUFeatures f(*this);
+    f.Remove(others...);
+    return f;
+  }
+
+  // Test whether the `other` feature set is equal to or a subset of this one.
   bool Has(const CPUFeatures& other) const;
-  bool Has(Feature feature0,
-           Feature feature1 = kNone,
-           Feature feature2 = kNone,
-           Feature feature3 = kNone) const;
+
+  // Test whether a single feature exists in this set.
+  // Note that `Has(kNone)` always returns true.
+  bool Has(Feature feature) const;
+
+  // Test whether all of the specified features exist in this set.
+  template <typename T, typename... U>
+  bool Has(T first, U... others) const {
+    return Has(first) && Has(others...);
+  }
 
   // Return the number of enabled features.
   size_t Count() const;
@@ -288,9 +372,8 @@ class CPUFeatures {
   const_iterator end() const;
 
  private:
-  // Each bit represents a feature. This field will be replaced as needed if
-  // features are added.
-  uint64_t features_;
+  // Each bit represents a feature. This set will be extended as needed.
+  std::bitset<kNumberOfFeatures> features_;
 
   friend std::ostream& operator<<(std::ostream& os,
                                   const vixl::CPUFeatures& features);
@@ -313,8 +396,8 @@ class CPUFeaturesConstIterator {
   bool operator!=(const CPUFeaturesConstIterator& other) const {
     return !(*this == other);
   }
-  CPUFeatures::Feature operator++();
-  CPUFeatures::Feature operator++(int);
+  CPUFeaturesConstIterator& operator++();
+  CPUFeaturesConstIterator operator++(int);
 
   CPUFeatures::Feature operator*() const {
     VIXL_ASSERT(IsValid());
@@ -359,21 +442,17 @@ class CPUFeaturesScope {
   // Start a CPUFeaturesScope on any object that implements
   // `CPUFeatures* GetCPUFeatures()`.
   template <typename T>
-  explicit CPUFeaturesScope(T* cpu_features_wrapper,
-                            CPUFeatures::Feature feature0 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature1 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature2 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+  explicit CPUFeaturesScope(T* cpu_features_wrapper)
       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
-        old_features_(*cpu_features_) {
-    cpu_features_->Combine(feature0, feature1, feature2, feature3);
-  }
+        old_features_(*cpu_features_) {}
 
-  template <typename T>
-  CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
+  template <typename T, typename U, typename... V>
+  CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
         old_features_(*cpu_features_) {
-    cpu_features_->Combine(other);
+    cpu_features_->Combine(first, features...);
   }
 
   ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
diff --git a/src/globals-vixl.h b/src/globals-vixl.h
index 640b4b9b..4dc8c024 100644
--- a/src/globals-vixl.h
+++ b/src/globals-vixl.h
@@ -27,6 +27,10 @@
 #ifndef VIXL_GLOBALS_H
 #define VIXL_GLOBALS_H
 
+#if __cplusplus < 201402L
+#error VIXL requires C++14
+#endif
+
 // Get standard C99 macros for integer types.
 #ifndef __STDC_CONSTANT_MACROS
 #define __STDC_CONSTANT_MACROS
@@ -66,7 +70,8 @@ typedef uint8_t byte;
 const int KBytes = 1024;
 const int MBytes = 1024 * KBytes;
 
-const int kBitsPerByte = 8;
+const int kBitsPerByteLog2 = 3;
+const int kBitsPerByte = 1 << kBitsPerByteLog2;
 
 template <int SizeInBits>
 struct Unsigned;
@@ -223,8 +228,11 @@ inline void USE(const T1&, const T2&, const T3&, const T4&) {}
 
 #if __cplusplus >= 201103L
 #define VIXL_OVERRIDE override
+#define VIXL_CONSTEXPR constexpr
+#define VIXL_HAS_CONSTEXPR 1
 #else
 #define VIXL_OVERRIDE
+#define VIXL_CONSTEXPR
 #endif
 
 // With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw
diff --git a/src/invalset-vixl.h b/src/invalset-vixl.h
index fbfb6a01..8bd6035e 100644
--- a/src/invalset-vixl.h
+++ b/src/invalset-vixl.h
@@ -842,9 +842,7 @@ InvalSetIterator<S>::InvalSetIterator(const InvalSetIterator<S>& other)
 #if __cplusplus >= 201103L
 template <class S>
 InvalSetIterator<S>::InvalSetIterator(InvalSetIterator<S>&& other) noexcept
-    : using_vector_(false),
-      index_(0),
-      inval_set_(NULL) {
+    : using_vector_(false), index_(0), inval_set_(NULL) {
   swap(*this, other);
 }
 #endif
diff --git a/src/pool-manager-impl.h b/src/pool-manager-impl.h
index 66ecd6a4..a1bcaaad 100644
--- a/src/pool-manager-impl.h
+++ b/src/pool-manager-impl.h
@@ -264,14 +264,14 @@ bool PoolManager<T>::MustEmit(T pc,
     if (checkpoint < temp.min_location_) return true;
   }
 
-  bool tempNotPlacedYet = true;
+  bool temp_not_placed_yet = true;
   for (int i = static_cast<int>(objects_.size()) - 1; i >= 0; --i) {
     const PoolObject<T>& current = objects_[i];
-    if (tempNotPlacedYet && PoolObjectLessThan(current, temp)) {
+    if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) {
       checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
       if (checkpoint < temp.min_location_) return true;
       if (CheckFuturePC(pc, checkpoint)) return true;
-      tempNotPlacedYet = false;
+      temp_not_placed_yet = false;
     }
     if (current.label_base_ == label_base) continue;
     checkpoint = UpdateCheckpointForObject(checkpoint, &current);
@@ -279,7 +279,7 @@ bool PoolManager<T>::MustEmit(T pc,
     if (CheckFuturePC(pc, checkpoint)) return true;
   }
   // temp is the object with the smallest max_location_.
-  if (tempNotPlacedYet) {
+  if (temp_not_placed_yet) {
     checkpoint = UpdateCheckpointForObject(checkpoint, &temp);
     if (checkpoint < temp.min_location_) return true;
   }
@@ -497,7 +497,7 @@ PoolManager<T>::~PoolManager<T>() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
   }
 #endif
   // Delete objects the pool manager owns.
-  for (typename std::vector<LocationBase<T> *>::iterator
+  for (typename std::vector<LocationBase<T>*>::iterator
            iter = delete_on_destruction_.begin(),
            end = delete_on_destruction_.end();
        iter != end;
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index c9287e40..0ae6dfc0 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -67,7 +67,7 @@ namespace vixl {
 #endif
 
 template <typename T, size_t n>
-size_t ArrayLength(const T (&)[n]) {
+constexpr size_t ArrayLength(const T (&)[n]) {
   return n;
 }
 
@@ -77,25 +77,30 @@ inline uint64_t GetUintMask(unsigned bits) {
   return base - 1;
 }
 
+inline uint64_t GetSignMask(unsigned bits) {
+  VIXL_ASSERT(bits <= 64);
+  return UINT64_C(1) << (bits - 1);
+}
+
 // Check number width.
 // TODO: Refactor these using templates.
 inline bool IsIntN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
-  uint32_t limit = UINT32_C(1) << (n - 1);
-  return x < limit;
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n));
 }
 inline bool IsIntN(unsigned n, int32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  if (n == 32) return true;
   int32_t limit = INT32_C(1) << (n - 1);
   return (-limit <= x) && (x < limit);
 }
 inline bool IsIntN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
-  uint64_t limit = UINT64_C(1) << (n - 1);
-  return x < limit;
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n));
 }
 inline bool IsIntN(unsigned n, int64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  if (n == 64) return true;
   int64_t limit = INT64_C(1) << (n - 1);
   return (-limit <= x) && (x < limit);
 }
@@ -104,7 +109,8 @@ VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
 }
 
 inline bool IsUintN(unsigned n, uint32_t x) {
-  VIXL_ASSERT((0 < n) && (n < 32));
+  VIXL_ASSERT((0 < n) && (n <= 32));
+  if (n >= 32) return true;
   return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int32_t x) {
@@ -113,7 +119,8 @@ inline bool IsUintN(unsigned n, int32_t x) {
   return !(static_cast<uint32_t>(x) >> n);
 }
 inline bool IsUintN(unsigned n, uint64_t x) {
-  VIXL_ASSERT((0 < n) && (n < 64));
+  VIXL_ASSERT((0 < n) && (n <= 64));
+  if (n >= 64) return true;
   return !(x >> n);
 }
 inline bool IsUintN(unsigned n, int64_t x) {
@@ -189,7 +196,7 @@ inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
 }
 
 
-inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) {
+inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) {
   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
               (msb >= lsb));
   return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
@@ -209,8 +216,7 @@ inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) {
   return result;
 }
 
-
-inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) {
+inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) {
   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
               (msb >= lsb));
   uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
@@ -219,7 +225,6 @@ inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint32_t x) {
   return result;
 }
 
-
 inline uint64_t RotateRight(uint64_t value,
                             unsigned int rotate,
                             unsigned int width) {
@@ -277,6 +282,19 @@ VIXL_DEPRECATED("RawbitsToDouble",
   return RawbitsToDouble(bits);
 }
 
+// Convert unsigned to signed numbers in a well-defined way (using two's
+// complement representations).
+inline int64_t RawbitsToInt64(uint64_t bits) {
+  return (bits >= UINT64_C(0x8000000000000000))
+             ? (-static_cast<int64_t>(-bits - 1) - 1)
+             : static_cast<int64_t>(bits);
+}
+
+inline int32_t RawbitsToInt32(uint32_t bits) {
+  return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1)
+                                        : static_cast<int32_t>(bits);
+}
+
 namespace internal {
 
 // Internal simulation class used solely by the simulator to
@@ -371,6 +389,10 @@ VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
 
 bool IsZero(Float16 value);
 
+inline bool IsPositiveZero(double value) {
+  return (value == 0.0) && (copysign(1.0, value) > 0.0);
+}
+
 inline bool IsNaN(float value) { return std::isnan(value); }
 
 inline bool IsNaN(double value) { return std::isnan(value); }
@@ -490,11 +512,11 @@ T ReverseBits(T value) {
 
 
 template <typename T>
-inline T SignExtend(T val, int bitSize) {
-  VIXL_ASSERT(bitSize > 0);
-  T mask = (T(2) << (bitSize - 1)) - T(1);
+inline T SignExtend(T val, int size_in_bits) {
+  VIXL_ASSERT(size_in_bits > 0);
+  T mask = (T(2) << (size_in_bits - 1)) - T(1);
   val &= mask;
-  T sign_bits = -((val >> (bitSize - 1)) << bitSize);
+  T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits);
   val |= sign_bits;
   return val;
 }
@@ -576,7 +598,7 @@ T AlignUp(T pointer,
   // reinterpret_cast behaviour for other types.
 
   typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
-      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
   VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
 
   size_t mask = alignment - 1;
@@ -596,7 +618,7 @@ T AlignDown(T pointer,
   // reinterpret_cast behaviour for other types.
 
   typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
-      (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer;
+      (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
   VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
 
   size_t mask = alignment - 1;
@@ -980,6 +1002,42 @@ Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
 
 Int64 BitCount(Uint32 value);
 
+// The algorithm used is adapted from the one described in section 8.2 of
+// Hacker's Delight, by Henry S. Warren, Jr.
+template <unsigned N, typename T>
+int64_t MultiplyHigh(T u, T v) {
+  uint64_t u0, v0, w0, u1, v1, w1, w2, t;
+  VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64));
+  uint64_t sign_mask = UINT64_C(1) << (N - 1);
+  uint64_t sign_ext = 0;
+  unsigned half_bits = N / 2;
+  uint64_t half_mask = GetUintMask(half_bits);
+  if (std::numeric_limits<T>::is_signed) {
+    sign_ext = UINT64_C(0xffffffffffffffff) << half_bits;
+  }
+
+  VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
+  VIXL_ASSERT(sizeof(u) == sizeof(u0));
+
+  u0 = u & half_mask;
+  u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0);
+  v0 = v & half_mask;
+  v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0);
+
+  w0 = u0 * v0;
+  t = u1 * v0 + (w0 >> half_bits);
+
+  w1 = t & half_mask;
+  w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0);
+  w1 = u0 * v1 + w1;
+  w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0);
+
+  uint64_t value = u1 * v1 + w2 + w1;
+  int64_t result;
+  memcpy(&result, &value, sizeof(result));
+  return result;
+}
+
 }  // namespace internal
 
 // The default NaN values (for FPCR.DN=1).
@@ -1244,9 +1302,8 @@ inline Float16 FPRoundToFloat16(int64_t sign,
                                 uint64_t mantissa,
                                 FPRounding round_mode) {
   return RawbitsToFloat16(
-      FPRound<uint16_t,
-              kFloat16ExponentBits,
-              kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+      FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>(
+          sign, exponent, mantissa, round_mode));
 }
 
 
@@ -1282,6 +1339,62 @@ Float16 FPToFloat16(double value,
                     FPRounding round_mode,
                     UseDefaultNaN DN,
                     bool* exception = NULL);
+
+// Like static_cast<T>(value), but with specialisations for the Float16 type.
+template <typename T, typename F>
+T StaticCastFPTo(F value) {
+  return static_cast<T>(value);
+}
+
+template <>
+inline float StaticCastFPTo<float, Float16>(Float16 value) {
+  return FPToFloat(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline double StaticCastFPTo<double, Float16>(Float16 value) {
+  return FPToDouble(value, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, float>(float value) {
+  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <>
+inline Float16 StaticCastFPTo<Float16, double>(double value) {
+  return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
+}
+
+template <typename T>
+uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) {
+  switch (size_in_bits) {
+    case 16:
+      return Float16ToRawbits(StaticCastFPTo<Float16>(value));
+    case 32:
+      return FloatToRawbits(StaticCastFPTo<float>(value));
+    case 64:
+      return DoubleToRawbits(StaticCastFPTo<double>(value));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
+template <typename T>
+T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
+  VIXL_ASSERT(IsUintN(size_in_bits, value));
+  switch (size_in_bits) {
+    case 16:
+      return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value)));
+    case 32:
+      return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value)));
+    case 64:
+      return StaticCastFPTo<T>(RawbitsToDouble(value));
+  }
+  VIXL_UNREACHABLE();
+  return 0;
+}
+
 }  // namespace vixl
 
 #endif  // VIXL_UTILS_H
author	Artem Serov <artem.serov@linaro.org>	2020-11-09 15:26:22 +0000
committer	Artem Serov <artem.serov@linaro.org>	2020-11-10 15:33:15 +0000
commit	5a229a9923d9dab968d7fe186ffa47ac52f9e065 (patch)
tree	ca689c0233a213244a288044dbb6cfc86d99be60 /src
parent	aa1d76b1824ec9bcf66af54fbdc9d137a3c398d5 (diff)
download	vixl-5a229a9923d9dab968d7fe186ffa47ac52f9e065.tar.gz