aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:18:23 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:18:23 +0000
commit95596860196fd998a07724598c875186cb18e708 (patch)
treef699330c2958363bba7dd027f7bba2c1b9de3a55
parent0cb2a584727d043ff12da67f185e21b9a852c731 (diff)
parent1aa43263531a06caef740a97373eead33b2ac2f7 (diff)
downloadvixl-android14-mainline-uwb-release.tar.gz
Change-Id: I7f86314b52728f41a23bf22c4e7178fc36e1396c
-rw-r--r--.clang-tidy1
-rw-r--r--.gitreview1
-rw-r--r--Android.bp27
-rw-r--r--README.md63
-rw-r--r--SConstruct18
-rw-r--r--doc/aarch64/supported-instructions-aarch64.md6371
-rw-r--r--doc/changelog.md124
-rw-r--r--examples/aarch32/custom-aarch32-disasm.cc4
-rw-r--r--examples/aarch64/custom-disassembler.cc21
-rw-r--r--examples/aarch64/custom-disassembler.h4
-rw-r--r--examples/aarch64/executable-memory.h88
-rw-r--r--examples/aarch64/getting-started.cc31
-rw-r--r--examples/aarch64/non-const-visitor.cc36
-rw-r--r--examples/aarch64/non-const-visitor.h9
-rw-r--r--src/aarch32/assembler-aarch32.cc668
-rw-r--r--src/aarch32/instructions-aarch32.cc6
-rw-r--r--src/aarch32/instructions-aarch32.h4
-rw-r--r--src/aarch32/macro-assembler-aarch32.h69
-rw-r--r--src/aarch64/assembler-aarch64.cc119
-rw-r--r--src/aarch64/assembler-aarch64.h1129
-rw-r--r--src/aarch64/assembler-sve-aarch64.cc3758
-rw-r--r--src/aarch64/constants-aarch64.h10
-rw-r--r--src/aarch64/cpu-aarch64.cc56
-rw-r--r--src/aarch64/cpu-aarch64.h28
-rw-r--r--src/aarch64/cpu-features-auditor-aarch64.cc388
-rw-r--r--src/aarch64/cpu-features-auditor-aarch64.h19
-rw-r--r--src/aarch64/decoder-aarch64.cc555
-rw-r--r--src/aarch64/decoder-aarch64.h203
-rw-r--r--src/aarch64/decoder-constants-aarch64.h9794
-rw-r--r--src/aarch64/decoder-visitor-map-aarch64.h2973
-rw-r--r--src/aarch64/disasm-aarch64.cc9308
-rw-r--r--src/aarch64/disasm-aarch64.h127
-rw-r--r--src/aarch64/instructions-aarch64.cc957
-rw-r--r--src/aarch64/instructions-aarch64.h89
-rw-r--r--src/aarch64/logic-aarch64.cc1417
-rw-r--r--src/aarch64/macro-assembler-aarch64.cc65
-rw-r--r--src/aarch64/macro-assembler-aarch64.h1231
-rw-r--r--src/aarch64/macro-assembler-sve-aarch64.cc953
-rw-r--r--src/aarch64/operands-aarch64.cc13
-rw-r--r--src/aarch64/operands-aarch64.h23
-rw-r--r--src/aarch64/simulator-aarch64.cc2947
-rw-r--r--src/aarch64/simulator-aarch64.h400
-rw-r--r--src/cpu-features.h15
-rw-r--r--src/utils-vixl.h19
-rw-r--r--test/aarch32/test-assembler-aarch32.cc120
-rw-r--r--test/aarch32/test-disasm-a32.cc109
-rw-r--r--test/aarch64/test-api-movprfx-aarch64.cc1889
-rw-r--r--test/aarch64/test-assembler-aarch64.cc8
-rw-r--r--test/aarch64/test-assembler-aarch64.h18
-rw-r--r--test/aarch64/test-assembler-fp-aarch64.cc456
-rw-r--r--test/aarch64/test-assembler-neon-aarch64.cc1389
-rw-r--r--test/aarch64/test-assembler-sve-aarch64.cc1472
-rw-r--r--test/aarch64/test-disasm-aarch64.cc657
-rw-r--r--test/aarch64/test-disasm-aarch64.h7
-rw-r--r--test/aarch64/test-disasm-neon-aarch64.cc343
-rw-r--r--test/aarch64/test-disasm-sve-aarch64.cc9338
-rw-r--r--test/aarch64/test-simulator-sve-aarch64.cc271
-rw-r--r--test/aarch64/test-simulator-sve2-aarch64.cc9122
-rw-r--r--test/aarch64/test-trace-aarch64.cc51
-rw-r--r--test/aarch64/test-utils-aarch64.cc236
-rw-r--r--test/aarch64/test-utils-aarch64.h80
-rw-r--r--test/test-donkey.cc327
-rw-r--r--test/test-pool-manager.cc20
-rw-r--r--test/test-runner.h19
-rw-r--r--test/test-trace-reference/log-all806
-rw-r--r--test/test-trace-reference/log-all-colour806
-rw-r--r--test/test-trace-reference/log-branch203
-rw-r--r--test/test-trace-reference/log-branch-colour203
-rw-r--r--test/test-trace-reference/log-cpufeatures204
-rw-r--r--test/test-trace-reference/log-cpufeatures-colour204
-rw-r--r--test/test-trace-reference/log-cpufeatures-custom204
-rw-r--r--test/test-trace-reference/log-disasm204
-rw-r--r--test/test-trace-reference/log-disasm-colour204
-rw-r--r--test/test-trace-reference/log-regs291
-rw-r--r--test/test-trace-reference/log-regs-colour291
-rw-r--r--test/test-trace-reference/log-state399
-rw-r--r--test/test-trace-reference/log-state-colour399
-rw-r--r--test/test-trace-reference/log-vregs108
-rw-r--r--test/test-trace-reference/log-vregs-colour108
-rw-r--r--test/test-trace-reference/log-write203
-rw-r--r--test/test-trace-reference/log-write-colour203
-rwxr-xr-xtools/check_recent_coverage.sh51
-rwxr-xr-xtools/clang_tidy.py2
-rw-r--r--tools/code_coverage.log10
-rwxr-xr-xtools/code_coverage.sh72
-rwxr-xr-xtools/make_instruction_doc_aarch64.pl56
-rwxr-xr-xtools/test.py9
87 files changed, 59845 insertions, 15468 deletions
diff --git a/.clang-tidy b/.clang-tidy
index 57feba42..256ea716 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -28,6 +28,5 @@
Checks: '-clang-analyzer-security.insecureAPI.rand,google-*,-google-readability-todo,-google-readability-function-size,-google-build-using-namespace,-google-explicit-constructor,-google-readability-braces-around-statements,-google-readability-namespace-comments,-google-readability-casting'
HeaderFilterRegex: '\.h$'
AnalyzeTemporaryDtors: false
-CheckOptions:
...
diff --git a/.gitreview b/.gitreview
index fcf18723..9496adc1 100644
--- a/.gitreview
+++ b/.gitreview
@@ -2,3 +2,4 @@
host=review.linaro.org
port=29418
project=arm/vixl
+defaultbranch=sve2
diff --git a/Android.bp b/Android.bp
index e64dec16..eeaf1520 100644
--- a/Android.bp
+++ b/Android.bp
@@ -72,10 +72,10 @@ license {
cc_defaults {
name: "vixl-common",
host_supported: true,
- clang_cflags: ["-Wimplicit-fallthrough"],
cflags: [
"-Wall",
"-Werror",
+ "-Wimplicit-fallthrough",
],
cppflags: [
"-DVIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE=0",
@@ -163,6 +163,17 @@ cc_defaults {
srcs: ["src/*.cc"],
export_include_dirs: ["src"],
min_sdk_version: "S",
+
+ static: {
+ cflags: [
+ "-fvisibility=hidden",
+ ],
+ },
+ shared: {
+ cflags: [
+ "-fvisibility=protected",
+ ],
+ },
}
art_cc_library {
@@ -188,17 +199,6 @@ art_cc_library {
"com.android.art",
"com.android.art.debug",
],
-
- static: {
- cflags: [
- "-fvisibility=hidden",
- ],
- },
- shared: {
- cflags: [
- "-fvisibility=protected",
- ],
- },
}
art_cc_library {
@@ -233,6 +233,9 @@ cc_test_host {
local_include_dirs: [
"test",
],
+ exclude_srcs: [
+ "test/test-donkey.cc"
+ ],
srcs: [
"test/*.cc",
"test/aarch32/*.cc",
diff --git a/README.md b/README.md
index 57e94b7d..c1a0a7d0 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-VIXL: ARMv8 Runtime Code Generation Library, Development Version
-================================================================
+VIXL: ARMv8 Runtime Code Generation Library
+===========================================
Contents:
@@ -24,10 +24,14 @@ VIXL contains three components.
assembler. The simulator allows generated code to be run on another
architecture without the need for a full ISA model.
-The VIXL git repository can be found [on 'https://git.linaro.org'][vixl].
+The VIXL git repository can be found [on GitHub][vixl].
-Changes from previous versions of VIXL can be found in the
-[Changelog](doc/changelog.md).
+Build and Test Status
+---------------------
+
+ * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit)](https://ci.linaro.org/job/linaro-art-vixlpresubmit/) Simulator
+ * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-native-armv8)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-native-armv8/) Native
+ * [![Build Status](https://ci.linaro.org/buildStatus/icon?job=linaro-art-vixlpresubmit-macos)](https://ci.linaro.org/job/linaro-art-vixlpresubmit-macos/) MacOS
Licence
@@ -36,6 +40,8 @@ Licence
This software is covered by the licence described in the [LICENCE](LICENCE)
file.
+Contributions, as pull requests or via other means, are accepted under the terms
+of the same [LICENCE](LICENCE).
Requirements
============
@@ -63,6 +69,41 @@ Refer to the 'Usage' section for details.
Note that in Ubuntu 18.04, clang-tidy-4.0 will only work if the clang-4.0
package is also installed.
+Supported Arm Architecture Features
+===================================
+
+| Feature | VIXL CPUFeatures Flag | Notes |
+|------------|-----------------------|---------------------------------|
+| BTI | kBTI | Per-page enabling not supported |
+| DotProd | kDotProduct | |
+| FCMA | kFcma | |
+| FHM | kFHM | |
+| FP16 | kFPHalf, kNEONHalf | |
+| FRINTTS | kFrintToFixedSizedInt | |
+| FlagM | kFlagM | |
+| FlagM2 | kAXFlag | |
+| I8MM | kI8MM | |
+| JSCVT | kJSCVT | |
+| LOR | kLORegions | |
+| LRCPC | kRCpc | |
+| LRCPC2 | kRCpcImm | |
+| LSE | kAtomics | |
+| PAuth | kPAuth, kPAuthGeneric | Not ERETAA, ERETAB |
+| RAS | kRAS | |
+| RDM | kRDM | |
+| SVE | kSVE | |
+| SVE2 | kSVE2 | |
+| SVEBitPerm | kSVEBitPerm | |
+| SVEF32MM | kSVEF32MM | |
+| SVEF64MM | kSVEF64MM | |
+| SVEI8MM | kSVEI8MM | |
+
+Enable generating code for an architecture feature by combining a flag with
+the MacroAssembler's defaults. For example, to generate code for SVE, use
+`masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);`.
+
+See [the cpu features header file](src/cpu-features.h) for more information.
+
Known Limitations
=================
@@ -161,9 +202,9 @@ selection.
Bug reports
===========
-Bug reports may be sent to vixl@arm.com. Please provide any steps required to
-recreate a bug, along with build environment and host system information.
-
+Bug reports may be made in the Issues section of GitHub, or sent to
+vixl@arm.com. Please provide any steps required to recreate a bug, along with
+build environment and host system information.
Usage
=====
@@ -213,11 +254,11 @@ aarch32_examples` or `scons aarch64_examples` from the root directory, or use
-[cpplint]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
+[cpplint]: https://github.com/google/styleguide/tree/gh-pages/cpplint
"Google's cpplint.py script."
-[vixl]: https://git.linaro.org/arm/vixl.git
- "The VIXL repository at 'https://git.linaro.org'."
+[vixl]: https://github.com/Linaro/vixl
+ "The VIXL repository on GitHub."
[getting-started-aarch32]: doc/aarch32/getting-started-aarch32.md
"Introduction to VIXL for AArch32."
diff --git a/SConstruct b/SConstruct
index 6c1f5638..bb8638c7 100644
--- a/SConstruct
+++ b/SConstruct
@@ -84,6 +84,7 @@ options = {
'-pedantic',
'-Wwrite-strings',
'-Wunused',
+ '-Wshadow',
'-Wno-missing-noreturn'],
'CPPPATH' : [config.dir_src_vixl]
},
@@ -115,6 +116,10 @@ options = {
'ubsan:on' : {
'CCFLAGS': ['-fsanitize=undefined'],
'LINKFLAGS': ['-fsanitize=undefined']
+ },
+ 'coverage:on' : {
+ 'CCFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping'],
+ 'LINKFLAGS': ['-fprofile-instr-generate', '-fcoverage-mapping']
}
}
@@ -255,6 +260,8 @@ vars.AddVariables(
'release', allowed_values=config.build_options_modes),
EnumVariable('ubsan', 'Enable undefined behavior checks',
'off', allowed_values=['on', 'off']),
+ EnumVariable('coverage', 'Enable code coverage measurement',
+ 'off', allowed_values=['on', 'off']),
EnumVariable('negative_testing',
'Enable negative testing (needs exceptions)',
'off', allowed_values=['on', 'off']),
@@ -482,7 +489,7 @@ top_level_targets.Add('', 'Build the VIXL library.')
# Common test code.
test_build_dir = PrepareVariantDir('test', TargetBuildDir(env))
-test_objects = [env.Object(Glob(join(test_build_dir, '*.cc')))]
+test_objects = [env.Object(Glob(join(test_build_dir, '*.cc'), exclude=join(test_build_dir, 'test-donkey.cc')))]
# AArch32 support
if CanTargetAArch32(env):
@@ -564,6 +571,15 @@ if CanTargetAArch64(env):
CPPPATH = env['CPPPATH'] + [config.dir_aarch64_examples] + [config.dir_tests])
test_objects.append(test_aarch64_examples_obj)
+ # The simulator test generator.
+ donkey_objects = []
+ donkey_objects.append(env.Object(
+ [join(test_build_dir, 'test-donkey.cc'), join(test_aarch64_build_dir, 'test-utils-aarch64.cc')],
+ CPPPATH = env['CPPPATH'] + [config.dir_tests],
+ CCFLAGS = [flag for flag in env['CCFLAGS'] if flag != '-O3']))
+ donkey = env.Program(join(test_build_dir, 'test-donkey'), donkey_objects, LIBS=[libvixl])
+ env.Alias('tests', donkey)
+
test = env.Program(join(test_build_dir, 'test-runner'), test_objects,
LIBS=[libvixl])
env.Alias('tests', test)
diff --git a/doc/aarch64/supported-instructions-aarch64.md b/doc/aarch64/supported-instructions-aarch64.md
index 1c16eb2b..5919354f 100644
--- a/doc/aarch64/supported-instructions-aarch64.md
+++ b/doc/aarch64/supported-instructions-aarch64.md
@@ -6,9 +6,25 @@ disassembler and simulator. The simulator may not support all floating point
operations to the precision required by AArch64 - please check the simulator
source code for details.
+#### AAch64 integer instructions ####
+[a](#integer-a) [b](#integer-b) [c](#integer-c) [d](#integer-d) [e](#integer-e) [h](#integer-h) [i](#integer-i) [l](#integer-l) [m](#integer-m) [n](#integer-n) [o](#integer-o) [p](#integer-p) [r](#integer-r) [s](#integer-s) [t](#integer-t) [u](#integer-u) [x](#integer-x)
+
+#### AArch64 floating point and NEON instructions ####
+[a](#float-a) [b](#float-b) [c](#float-c) [d](#float-d) [e](#float-e) [f](#float-f) [i](#float-i) [l](#float-l) [m](#float-m) [n](#float-n) [o](#float-o) [p](#float-p) [r](#float-r) [s](#float-s) [t](#float-t) [u](#float-u) [x](#float-x) [z](#float-z)
+
+#### AArch64 Scalable Vector Extension (SVE) instructions ####
+[a](#sve-a) [b](#sve-b) [c](#sve-c) [d](#sve-d) [e](#sve-e) [f](#sve-f) [h](#sve-h) [i](#sve-i) [l](#sve-l) [m](#sve-m) [n](#sve-n) [o](#sve-o) [p](#sve-p) [r](#sve-r) [s](#sve-s) [t](#sve-t) [u](#sve-u) [w](#sve-w) [x](#sve-x) [z](#sve-z)
+
+#### Additional or pseudo instructions ####
+[b](#pseudo-b) [d](#pseudo-d) [p](#pseudo-p)
+
+___
+
AArch64 integer instructions
----------------------------
+<a id="integer-a">
+
### ADC ###
Add with carry bit.
@@ -100,27 +116,6 @@ Authenticate Data address, using key A _(Armv8.3)_.
void autda(const Register& xd, const Register& xn)
-### AUTDA1716 ###
-
-Authenticate Data address, using key A, with address in x17 and modifier in x16 _(Armv8.3)_.
-
- void autda1716()
-
-
-### AUTDASP ###
-
-Authenticate Data address, using key A, with address in LR and modifier in SP _(Armv8.3)_.
-
- void autdasp()
-
-
-### AUTDAZ ###
-
-Authenticate Data address, using key A, with address in LR and a modifier of zero _(Armv8.3)_.
-
- void autdaz()
-
-
### AUTDB ###
Authenticate Data address, using key B _(Armv8.3)_.
@@ -128,27 +123,6 @@ Authenticate Data address, using key B _(Armv8.3)_.
void autdb(const Register& xd, const Register& xn)
-### AUTDB1716 ###
-
-Authenticate Data address, using key B, with address in x17 and modifier in x16 _(Armv8.3)_.
-
- void autdb1716()
-
-
-### AUTDBSP ###
-
-Authenticate Data address, using key B, with address in LR and modifier in SP _(Armv8.3)_.
-
- void autdbsp()
-
-
-### AUTDBZ ###
-
-Authenticate Data address, using key B, with address in LR and a modifier of zero _(Armv8.3)_.
-
- void autdbz()
-
-
### AUTDZA ###
Authenticate Data address, using key A and a modifier of zero _(Armv8.3)_.
@@ -240,6 +214,8 @@ Convert floating-point condition flags from Arm format to alternative format _(A
void axflag()
+<a id="integer-b">
+
### B ###
Conditional branch to PC offset.
@@ -417,6 +393,8 @@ Branch target identification.
void bti(BranchTargetIdentifier id)
+<a id="integer-c">
+
### CAS ###
Compare and Swap word or doubleword in memory _(Armv8.1)_.
@@ -773,6 +751,8 @@ Conditional select negation: rd = cond ? rn : -rm.
Condition cond)
+<a id="integer-d">
+
### DC ###
System data cache operation.
@@ -794,6 +774,8 @@ Data synchronization barrier.
void dsb(BarrierDomain domain, BarrierType type)
+<a id="integer-e">
+
### EON ###
Bitwise enor/xnor (A ^ ~B).
@@ -825,6 +807,8 @@ Extract.
unsigned lsb)
+<a id="integer-h">
+
### HINT ###
System hint (named type).
@@ -846,6 +830,8 @@ Halting debug-mode breakpoint.
void hlt(int code)
+<a id="integer-i">
+
### IC ###
System instruction cache operation.
@@ -860,6 +846,8 @@ Instruction synchronization barrier.
void isb()
+<a id="integer-l">
+
### LDADD ###
Atomic add on word or doubleword in memory _(Armv8.1)_
@@ -1896,6 +1884,8 @@ Logical shift right by variable.
void lsrv(const Register& rd, const Register& rn, const Register& rm)
+<a id="integer-m">
+
### MADD ###
Multiply and accumulate.
@@ -1915,6 +1905,13 @@ Negated multiply.
### MOV ###
+Move immediate, aliases for movz, movn, orr.
+
+ void mov(const Register& rd, uint64_t imm)
+
+
+### MOV ###
+
Move register to register.
void mov(const Register& rd, const Register& rn)
@@ -1979,6 +1976,8 @@ Move inverted operand to register.
void mvn(const Register& rd, const Operand& operand)
+<a id="integer-n">
+
### NEG ###
Negate.
@@ -2014,6 +2013,8 @@ No-op.
void nop()
+<a id="integer-o">
+
### ORN ###
Bitwise nor (A | ~B).
@@ -2028,6 +2029,8 @@ Bitwise or (A | B).
void orr(const Register& rd, const Register& rn, const Operand& operand)
+<a id="integer-p">
+
### PACDA ###
Pointer Authentication Code for Data address, using key A _(Armv8.3)_.
@@ -2035,27 +2038,6 @@ Pointer Authentication Code for Data address, using key A _(Armv8.3)_.
void pacda(const Register& xd, const Register& xn)
-### PACDA1716 ###
-
-Pointer Authentication Code for Data address, using key A, with address in x17 and modifier in x16 _(Armv8.3)_.
-
- void pacda1716()
-
-
-### PACDASP ###
-
-Pointer Authentication Code for Data address, using key A, with address in LR and modifier in SP _(Armv8.3)_.
-
- void pacdasp()
-
-
-### PACDAZ ###
-
-Pointer Authentication Code for Data address, using key A, with address in LR and a modifier of zero _(Armv8.3)_.
-
- void pacdaz()
-
-
### PACDB ###
Pointer Authentication Code for Data address, using key B _(Armv8.3)_.
@@ -2063,27 +2045,6 @@ Pointer Authentication Code for Data address, using key B _(Armv8.3)_.
void pacdb(const Register& xd, const Register& xn)
-### PACDB1716 ###
-
-Pointer Authentication Code for Data address, using key B, with address in x17 and modifier in x16 _(Armv8.3)_.
-
- void pacdb1716()
-
-
-### PACDBSP ###
-
-Pointer Authentication Code for Data address, using key B, with address in LR and modifier in SP _(Armv8.3)_.
-
- void pacdbsp()
-
-
-### PACDBZ ###
-
-Pointer Authentication Code for Data address, using key B, with address in LR and a modifier of zero _(Armv8.3)_.
-
- void pacdbz()
-
-
### PACDZA ###
Pointer Authentication Code for Data address, using key A and a modifier of zero _(Armv8.3)_.
@@ -2177,6 +2138,13 @@ Pointer Authentication Code for Instruction address, using key B and a modifier
### PRFM ###
+Prefetch from pc + imm19 << 2 (allowing unallocated hints).
+
+ void prfm(int op, int64_t imm19)
+
+
+### PRFM ###
+
Prefetch from pc + imm19 << 2.
void prfm(PrefetchOperation op, int64_t imm19)
@@ -2184,6 +2152,22 @@ Prefetch from pc + imm19 << 2.
### PRFM ###
+Prefetch memory (allowing unallocated hints).
+
+ void prfm(int op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferScaledOffset)
+
+
+### PRFM ###
+
+Prefetch memory in the literal pool (allowing unallocated hints).
+
+ void prfm(int op, RawLiteral* literal)
+
+
+### PRFM ###
+
Prefetch memory in the literal pool.
void prfm(PrefetchOperation op, RawLiteral* literal)
@@ -2207,6 +2191,17 @@ Prefetch memory (with unscaled offset).
LoadStoreScalingOption option = PreferUnscaledOffset)
+### PRFUM ###
+
+Prefetch memory (with unscaled offset, allowing unallocated hints).
+
+ void prfum(int op,
+ const MemOperand& addr,
+ LoadStoreScalingOption option = PreferUnscaledOffset)
+
+
+<a id="integer-r">
+
### RBIT ###
Bit reverse.
@@ -2284,6 +2279,8 @@ Rotate right by variable.
void rorv(const Register& rd, const Register& rn, const Register& rm)
+<a id="integer-s">
+
### SBC ###
Subtract with carry bit.
@@ -3056,6 +3053,8 @@ System instruction.
void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr)
+<a id="integer-t">
+
### TBNZ ###
Test bit and branch to PC offset if not zero.
@@ -3091,6 +3090,8 @@ Bit test and set flags.
void tst(const Register& rn, const Operand& operand)
+<a id="integer-u">
+
### UBFIZ ###
Unsigned bitfield insert with zero at right.
@@ -3121,6 +3122,13 @@ Unsigned bitfield extract.
unsigned width)
+### UDF ###
+
+Generate undefined instruction exception.
+
+ void udf(int code)
+
+
### UDIV ###
Unsigned integer divide.
@@ -3183,6 +3191,8 @@ Unsigned extend word.
void uxtw(const Register& rd, const Register& rn)
+<a id="integer-x">
+
### XAFLAG ###
Convert floating-point condition flags from alternative format to Arm format _(Armv8.5)_.
@@ -3215,6 +3225,8 @@ Strip Pointer Authentication Code of Instruction address in LR _(Armv8.3)_.
AArch64 floating point and NEON instructions
--------------------------------------------
+<a id="float-a">
+
### ABS ###
Absolute value.
@@ -3271,6 +3283,8 @@ Bitwise and.
void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-b">
+
### BIC ###
Bit clear immediate.
@@ -3306,6 +3320,8 @@ Bitwise select.
void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-c">
+
### CLS ###
Count leading sign bits.
@@ -3404,6 +3420,8 @@ Population count per byte.
void cnt(const VRegister& vd, const VRegister& vn)
+<a id="float-d">
+
### DUP ###
Duplicate general-purpose register to vector.
@@ -3418,6 +3436,8 @@ Duplicate vector element to vector or scalar.
void dup(const VRegister& vd, const VRegister& vn, int vn_index)
+<a id="float-e">
+
### EOR ###
Bitwise eor.
@@ -3435,6 +3455,8 @@ Extract vector from pair of vectors.
int index)
+<a id="float-f">
+
### FABD ###
FP absolute difference.
@@ -4211,6 +4233,34 @@ FP reciprocal exponent scalar.
void frecpx(const VRegister& vd, const VRegister& vn)
+### FRINT32X ###
+
+FP round to 32-bit integer, exact, implicit rounding _(Armv8.5)_.
+
+ void frint32x(const VRegister& vd, const VRegister& vn)
+
+
+### FRINT32Z ###
+
+FP round to 32-bit integer, towards zero _(Armv8.5)_.
+
+ void frint32z(const VRegister& vd, const VRegister& vn)
+
+
+### FRINT64X ###
+
+FP round to 64-bit integer, exact, implicit rounding _(Armv8.5)_.
+
+ void frint64x(const VRegister& vd, const VRegister& vn)
+
+
+### FRINT64Z ###
+
+FP round to 64-bit integer, towards zero _(Armv8.5)_.
+
+ void frint64z(const VRegister& vd, const VRegister& vn)
+
+
### FRINTA ###
FP round to integer, nearest with ties to away.
@@ -4288,6 +4338,8 @@ FP subtract.
void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-i">
+
### INS ###
Insert vector element from another vector element.
@@ -4305,6 +4357,8 @@ Insert vector element from general-purpose register.
void ins(const VRegister& vd, int vd_index, const Register& rn)
+<a id="float-l">
+
### LD1 ###
One-element single structure load to one lane.
@@ -4443,6 +4497,8 @@ Four-element single structure load to all lanes.
const MemOperand& src)
+<a id="float-m">
+
### MLA ###
Multiply-add by scalar element.
@@ -4559,6 +4615,8 @@ Vector move inverted immediate.
const int shift_amount = 0)
+<a id="float-n">
+
### NEG ###
Negate.
@@ -4573,6 +4631,8 @@ Bitwise not.
void not_(const VRegister& vd, const VRegister& vn)
+<a id="float-o">
+
### ORN ###
Bitwise orn.
@@ -4594,6 +4654,8 @@ Bitwise or.
void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-p">
+
### PMUL ###
Polynomial multiply.
@@ -4615,6 +4677,8 @@ Polynomial multiply long (second part).
void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-r">
+
### RADDHN ###
Rounding add narrow returning high half.
@@ -4685,6 +4749,8 @@ Rounding subtract narrow returning high half (second part).
void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-s">
+
### SABA ###
Signed absolute difference and accumulate.
@@ -4973,6 +5039,13 @@ Signed long multiply-sub by scalar element (second part).
int vm_index)
+### SMMLA ###
+
+Signed 8-bit integer matrix multiply-accumulate (vector).
+
+ void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
### SMOV ###
Signed move vector element to general-purpose register.
@@ -5546,6 +5619,16 @@ Subtract narrow returning high half (second part).
void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+### SUDOT ###
+
+Dot product with signed and unsigned integers (vector, by element).
+
+ void sudot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index)
+
+
### SUQADD ###
Signed saturating accumulate of unsigned value.
@@ -5567,6 +5650,8 @@ Signed extend long (second part).
void sxtl2(const VRegister& vd, const VRegister& vn)
+<a id="float-t">
+
### TBL ###
Table lookup from four registers.
@@ -5661,6 +5746,8 @@ Transpose vectors (secondary).
void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-u">
+
### UABA ###
Unsigned absolute difference and accumulate.
@@ -5907,6 +5994,13 @@ Unsigned long multiply-sub by scalar element (second part).
int vm_index)
+### UMMLA ###
+
+Unsigned 8-bit integer matrix multiply-accumulate (vector).
+
+ void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
### UMOV ###
Unsigned move vector element to general-purpose register.
@@ -6067,6 +6161,23 @@ Unsigned rounding shift right by immediate and accumulate.
void ursra(const VRegister& vd, const VRegister& vn, int shift)
+### USDOT ###
+
+Dot Product with unsigned and signed integers (vector).
+
+ void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
+### USDOT ###
+
+Dot product with unsigned and signed integers (vector, by element).
+
+ void usdot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index)
+
+
### USHL ###
Unsigned shift left by register.
@@ -6095,6 +6206,13 @@ Unsigned shift right by immediate.
void ushr(const VRegister& vd, const VRegister& vn, int shift)
+### USMMLA ###
+
+Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
+
+ void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+
+
### USQADD ###
Unsigned saturating accumulate of signed value.
@@ -6165,6 +6283,8 @@ Unzip vectors (secondary).
void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm)
+<a id="float-x">
+
### XTN ###
Extract narrow.
@@ -6179,6 +6299,8 @@ Extract narrow (second part).
void xtn2(const VRegister& vd, const VRegister& vn)
+<a id="float-z">
+
### ZIP1 ###
Zip vectors (primary).
@@ -6194,9 +6316,6086 @@ Zip vectors (secondary).
+AArch64 Scalable Vector Extension (SVE) instructions
+----------------------------------------------------
+
+<a id="sve-a">
+
+### ABS ###
+
+Absolute value (predicated).
+
+ void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### ADCLB ###
+
+Add with carry long (bottom).
+
+ void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### ADCLT ###
+
+Add with carry long (top).
+
+ void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### ADD ###
+
+Add immediate (unpredicated).
+
+ void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1)
+
+
+### ADD ###
+
+Add vectors (predicated).
+
+ void add(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### ADD ###
+
+Add vectors (unpredicated).
+
+ void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ADDHNB ###
+
+Add narrow high part (bottom).
+
+ void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ADDHNT ###
+
+Add narrow high part (top).
+
+ void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ADDP ###
+
+Add pairwise.
+
+ void addp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### ADDPL ###
+
+Add multiple of predicate register size to scalar register.
+
+ void addpl(const Register& xd, const Register& xn, int imm6)
+
+
+### ADDVL ###
+
+Add multiple of vector register size to scalar register.
+
+ void addvl(const Register& xd, const Register& xn, int imm6)
+
+
+### ADR ###
+
+Compute vector address.
+
+ void adr(const ZRegister& zd, const SVEMemOperand& addr)
+
+
+### AND ###
+
+Bitwise AND predicates.
+
+ void and_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### AND ###
+
+Bitwise AND vectors (predicated).
+
+ void and_(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### AND ###
+
+Bitwise AND vectors (unpredicated).
+
+ void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### AND ###
+
+Bitwise AND with immediate (unpredicated).
+
+ void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### ANDS ###
+
+Bitwise AND predicates.
+
+ void ands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ANDV ###
+
+Bitwise AND reduction to scalar.
+
+ void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### ASR ###
+
+Arithmetic shift right by 64-bit wide elements (predicated).
+
+ void asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### ASR ###
+
+Arithmetic shift right by 64-bit wide elements (unpredicated).
+
+ void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ASR ###
+
+Arithmetic shift right by immediate (predicated).
+
+ void asr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### ASR ###
+
+Arithmetic shift right by immediate (unpredicated).
+
+ void asr(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### ASRD ###
+
+Arithmetic shift right for divide by immediate (predicated).
+
+ void asrd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### ASRR ###
+
+Reversed arithmetic shift right by vector (predicated).
+
+ void asrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+<a id="sve-b">
+
+### BCAX ###
+
+Bitwise clear and exclusive OR.
+
+ void bcax(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+### BDEP ###
+
+Scatter lower bits into positions selected by bitmask.
+
+ void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### BEXT ###
+
+Gather lower bits from positions selected by bitmask.
+
+ void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### BGRP ###
+
+Group bits to right or left as selected by bitmask.
+
+ void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### BIC ###
+
+Bitwise clear bits using immediate (unpredicated).
+
+ void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### BIC ###
+
+Bitwise clear predicates.
+
+ void bic(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BIC ###
+
+Bitwise clear vectors (predicated).
+
+ void bic(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### BIC ###
+
+Bitwise clear vectors (unpredicated).
+
+ void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### BICS ###
+
+Bitwise clear predicates.
+
+ void bics(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKA ###
+
+Break after first true condition.
+
+ void brka(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### BRKAS ###
+
+Break after first true condition.
+
+ void brkas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### BRKB ###
+
+Break before first true condition.
+
+ void brkb(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### BRKBS ###
+
+Break before first true condition.
+
+ void brkbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### BRKN ###
+
+Propagate break to next partition.
+
+ void brkn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKNS ###
+
+Propagate break to next partition.
+
+ void brkns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKPA ###
+
+Break after first true condition, propagating from previous partition.
+
+ void brkpa(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKPAS ###
+
+Break after first true condition, propagating from previous partition.
+
+ void brkpas(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKPB ###
+
+Break before first true condition, propagating from previous partition.
+
+ void brkpb(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BRKPBS ###
+
+Break before first true condition, propagating from previous partition.
+
+ void brkpbs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### BSL ###
+
+Bitwise select.
+
+ void bsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+### BSL1N ###
+
+Bitwise select with first input inverted.
+
+ void bsl1n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+### BSL2N ###
+
+Bitwise select with second input inverted.
+
+ void bsl2n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+<a id="sve-c">
+
+### CADD ###
+
+Complex integer add with rotate.
+
+ void cadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### CDOT ###
+
+Complex integer dot product (indexed).
+
+ void cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot)
+
+
+### CDOT ###
+
+Complex integer dot product.
+
+ void cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### CLASTA ###
+
+Conditionally extract element after last to SIMD&FP scalar register.
+
+ void clasta(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm)
+
+
+### CLASTA ###
+
+Conditionally extract element after last to general-purpose register.
+
+ void clasta(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm)
+
+
+### CLASTA ###
+
+Conditionally extract element after last to vector register.
+
+ void clasta(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CLASTB ###
+
+Conditionally extract last element to SIMD&FP scalar register.
+
+ void clastb(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm)
+
+
+### CLASTB ###
+
+Conditionally extract last element to general-purpose register.
+
+ void clastb(const Register& rd,
+ const PRegister& pg,
+ const Register& rn,
+ const ZRegister& zm)
+
+
+### CLASTB ###
+
+Conditionally extract last element to vector register.
+
+ void clastb(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CLS ###
+
+Count leading sign bits (predicated).
+
+ void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### CLZ ###
+
+Count leading zero bits (predicated).
+
+ void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### CMLA ###
+
+Complex integer multiply-add with rotate (indexed).
+
+ void cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot)
+
+
+### CMLA ###
+
+Complex integer multiply-add with rotate.
+
+ void cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### CMP ###
+
+
+
+ void cmp(Condition cond,
+ const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPEQ ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPEQ ###
+
+Compare vector to immediate.
+
+ void cmpeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CMPGE ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPGE ###
+
+Compare vector to immediate.
+
+ void cmpge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CMPGT ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPGT ###
+
+Compare vector to immediate.
+
+ void cmpgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CMPHI ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPHI ###
+
+Compare vector to immediate.
+
+ void cmphi(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7)
+
+
+### CMPHS ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPHS ###
+
+Compare vector to immediate.
+
+ void cmphs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7)
+
+
+### CMPLE ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPLE ###
+
+Compare vector to immediate.
+
+ void cmple(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CMPLO ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPLO ###
+
+Compare vector to immediate.
+
+ void cmplo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7)
+
+
+### CMPLS ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPLS ###
+
+Compare vector to immediate.
+
+ void cmpls(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ unsigned imm7)
+
+
+### CMPLT ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPLT ###
+
+Compare vector to immediate.
+
+ void cmplt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CMPNE ###
+
+Compare vector to 64-bit wide elements.
+
+ void cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### CMPNE ###
+
+Compare vector to immediate.
+
+ void cmpne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### CNOT ###
+
+Logically invert boolean condition in vector (predicated).
+
+ void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### CNT ###
+
+Count non-zero bits (predicated).
+
+ void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### CNTB ###
+
+Set scalar to multiple of predicate constraint element count.
+
+ void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### CNTD ###
+
+Set scalar to multiple of predicate constraint element count.
+
+ void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### CNTH ###
+
+Set scalar to multiple of predicate constraint element count.
+
+ void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### CNTP ###
+
+Set scalar to active predicate element count.
+
+ void cntp(const Register& xd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### CNTW ###
+
+Set scalar to multiple of predicate constraint element count.
+
+ void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### COMPACT ###
+
+Shuffle active elements of vector to the right and fill with zero.
+
+ void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn)
+
+
+### CPY ###
+
+Copy SIMD&FP scalar register to vector elements (predicated).
+
+ void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn)
+
+
+### CPY ###
+
+Copy general-purpose register to vector elements (predicated).
+
+ void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn)
+
+
+### CPY ###
+
+Copy signed integer immediate to vector elements (predicated).
+
+ void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1)
+
+
+### CTERMEQ ###
+
+Compare and terminate loop.
+
+ void ctermeq(const Register& rn, const Register& rm)
+
+
+### CTERMNE ###
+
+Compare and terminate loop.
+
+ void ctermne(const Register& rn, const Register& rm)
+
+
+<a id="sve-d">
+
+### DECB ###
+
+Decrement scalar by multiple of predicate constraint element count.
+
+ void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECD ###
+
+Decrement scalar by multiple of predicate constraint element count.
+
+ void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECD ###
+
+Decrement vector by multiple of predicate constraint element count.
+
+ void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECH ###
+
+Decrement scalar by multiple of predicate constraint element count.
+
+ void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECH ###
+
+Decrement vector by multiple of predicate constraint element count.
+
+ void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECP ###
+
+Decrement scalar by active predicate element count.
+
+ void decp(const Register& rdn, const PRegisterWithLaneSize& pg)
+
+
+### DECP ###
+
+Decrement vector by active predicate element count.
+
+ void decp(const ZRegister& zdn, const PRegister& pg)
+
+
+### DECW ###
+
+Decrement scalar by multiple of predicate constraint element count.
+
+ void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DECW ###
+
+Decrement vector by multiple of predicate constraint element count.
+
+ void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### DUP ###
+
+Broadcast general-purpose register to vector elements (unpredicated).
+
+ void dup(const ZRegister& zd, const Register& xn)
+
+
+### DUP ###
+
+Broadcast indexed element to vector (unpredicated).
+
+ void dup(const ZRegister& zd, const ZRegister& zn, unsigned index)
+
+
+### DUP ###
+
+Broadcast signed immediate to vector elements (unpredicated).
+
+ void dup(const ZRegister& zd, int imm8, int shift = -1)
+
+
+### DUPM ###
+
+Broadcast logical bitmask immediate to vector (unpredicated).
+
+ void dupm(const ZRegister& zd, uint64_t imm)
+
+
+<a id="sve-e">
+
+### EON ###
+
+Bitwise exclusive OR with inverted immediate (unpredicated).
+
+ void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### EOR ###
+
+Bitwise exclusive OR predicates.
+
+ void eor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### EOR ###
+
+Bitwise exclusive OR vectors (predicated).
+
+ void eor(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### EOR ###
+
+Bitwise exclusive OR vectors (unpredicated).
+
+ void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### EOR ###
+
+Bitwise exclusive OR with immediate (unpredicated).
+
+ void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### EOR3 ###
+
+Bitwise exclusive OR of three vectors.
+
+ void eor3(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+### EORBT ###
+
+Interleaving exclusive OR (bottom, top).
+
+ void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### EORS ###
+
+Bitwise exclusive OR predicates.
+
+ void eors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### EORTB ###
+
+Interleaving exclusive OR (top, bottom).
+
+ void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### EORV ###
+
+Bitwise XOR reduction to scalar.
+
+ void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### EXT ###
+
+Extract vector from pair of vectors.
+
+ void ext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned offset)
+
+
+<a id="sve-f">
+
+### FABD ###
+
+Floating-point absolute difference (predicated).
+
+ void fabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FABS ###
+
+Floating-point absolute value (predicated).
+
+ void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FACGE ###
+
+Floating-point absolute compare vectors.
+
+ void facge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FACGT ###
+
+Floating-point absolute compare vectors.
+
+ void facgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FADD ###
+
+Floating-point add immediate (predicated).
+
+ void fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FADD ###
+
+Floating-point add vector (predicated).
+
+ void fadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FADD ###
+
+Floating-point add vector (unpredicated).
+
+ void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FADDA ###
+
+Floating-point add strictly-ordered reduction, accumulating in scalar.
+
+ void fadda(const VRegister& vd,
+ const PRegister& pg,
+ const VRegister& vn,
+ const ZRegister& zm)
+
+
+### FADDP ###
+
+Floating-point add pairwise.
+
+ void faddp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FADDV ###
+
+Floating-point add recursive reduction to scalar.
+
+ void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### FCADD ###
+
+Floating-point complex add with rotate (predicated).
+
+ void fcadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### FCMEQ ###
+
+Floating-point compare vector with zero.
+
+ void fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMEQ ###
+
+Floating-point compare vectors.
+
+ void fcmeq(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FCMGE ###
+
+Floating-point compare vector with zero.
+
+ void fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMGE ###
+
+Floating-point compare vectors.
+
+ void fcmge(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FCMGT ###
+
+Floating-point compare vector with zero.
+
+ void fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMGT ###
+
+Floating-point compare vectors.
+
+ void fcmgt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FCMLA ###
+
+Floating-point complex multiply-add by indexed values with rotate.
+
+ void fcmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot)
+
+
+### FCMLA ###
+
+Floating-point complex multiply-add with rotate (predicated).
+
+ void fcmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### FCMLE ###
+
+Floating-point compare vector with zero.
+
+ void fcmle(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMLT ###
+
+Floating-point compare vector with zero.
+
+ void fcmlt(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMNE ###
+
+Floating-point compare vector with zero.
+
+ void fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ double zero)
+
+
+### FCMNE ###
+
+Floating-point compare vectors.
+
+ void fcmne(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FCMUO ###
+
+Floating-point compare vectors.
+
+ void fcmuo(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FCPY ###
+
+Copy floating-point immediate to vector elements (predicated).
+
+ void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm)
+
+
+### FCPY ###
+
+Copy half-precision floating-point immediate to vector elements (predicated).
+
+ void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm)
+
+
+### FCVT ###
+
+Floating-point convert precision (predicated).
+
+ void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTLT ###
+
+Floating-point up convert long (top, predicated).
+
+ void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTNT ###
+
+Floating-point down convert and narrow (top, predicated).
+
+ void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTX ###
+
+Floating-point down convert, rounding to odd (predicated).
+
+ void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTXNT ###
+
+Floating-point down convert, rounding to odd (top, predicated).
+
+ void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTZS ###
+
+Floating-point convert to signed integer, rounding toward zero (predicated).
+
+ void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FCVTZU ###
+
+Floating-point convert to unsigned integer, rounding toward zero (predicated).
+
+ void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FDIV ###
+
+Floating-point divide by vector (predicated).
+
+ void fdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FDIVR ###
+
+Floating-point reversed divide by vector (predicated).
+
+ void fdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FDUP ###
+
+Broadcast floating-point immediate to vector elements.
+
+ void fdup(const ZRegister& zd, double imm)
+
+
+### FDUP ###
+
+Broadcast half-precision floating-point immediate to vector elements.
+
+ void fdup(const ZRegister& zd, Float16 imm)
+
+
+### FEXPA ###
+
+Floating-point exponential accelerator.
+
+ void fexpa(const ZRegister& zd, const ZRegister& zn)
+
+
+### FLOGB ###
+
+Floating-point base 2 logarithm as integer.
+
+ void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FMAD ###
+
+Floating-point fused multiply-add vectors (predicated), writing multiplicand [Zdn = Za + Zdn * Zm].
+
+ void fmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### FMAX ###
+
+Floating-point maximum (predicated).
+
+ void fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMAX ###
+
+Floating-point maximum with immediate (predicated).
+
+ void fmax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FMAXNM ###
+
+Floating-point maximum number (predicated).
+
+ void fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMAXNM ###
+
+Floating-point maximum number with immediate (predicated).
+
+ void fmaxnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FMAXNMP ###
+
+Floating-point maximum number pairwise.
+
+ void fmaxnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMAXNMV ###
+
+Floating-point maximum number recursive reduction to scalar.
+
+ void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### FMAXP ###
+
+Floating-point maximum pairwise.
+
+ void fmaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMAXV ###
+
+Floating-point maximum recursive reduction to scalar.
+
+ void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### FMIN ###
+
+Floating-point minimum (predicated).
+
+ void fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMIN ###
+
+Floating-point minimum with immediate (predicated).
+
+ void fmin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FMINNM ###
+
+Floating-point minimum number (predicated).
+
+ void fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMINNM ###
+
+Floating-point minimum number with immediate (predicated).
+
+ void fminnm(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FMINNMP ###
+
+Floating-point minimum number pairwise.
+
+ void fminnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMINNMV ###
+
+Floating-point minimum number recursive reduction to scalar.
+
+ void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### FMINP ###
+
+Floating-point minimum pairwise.
+
+ void fminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMINV ###
+
+Floating-point minimum recursive reduction to scalar.
+
+ void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### FMLA ###
+
+Floating-point fused multiply-add by indexed elements (Zda = Zda + Zn * Zm[indexed]).
+
+ void fmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMLA ###
+
+Floating-point fused multiply-add vectors (predicated), writing addend [Zda = Zda + Zn * Zm].
+
+ void fmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMLALB ###
+
+Half-precision floating-point multiply-add long to single-precision (bottom).
+
+ void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMLALB ###
+
+Half-precision floating-point multiply-add long to single-precision (bottom, indexed).
+
+ void fmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMLALT ###
+
+Half-precision floating-point multiply-add long to single-precision (top).
+
+ void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMLALT ###
+
+Half-precision floating-point multiply-add long to single-precision (top, indexed).
+
+ void fmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMLS ###
+
+Floating-point fused multiply-subtract by indexed elements (Zda = Zda + -Zn * Zm[indexed]).
+
+ void fmls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMLS ###
+
+Floating-point fused multiply-subtract vectors (predicated), writing addend [Zda = Zda + -Zn * Zm].
+
+ void fmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMLSLB ###
+
+Half-precision floating-point multiply-subtract long from single-precision (bottom).
+
+ void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMLSLB ###
+
+Half-precision floating-point multiply-subtract long from single-precision (bottom, indexed).
+
+ void fmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMLSLT ###
+
+Half-precision floating-point multiply-subtract long from single-precision (top).
+
+ void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMLSLT ###
+
+Half-precision floating-point multiply-subtract long from single-precision (top, indexed).
+
+ void fmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### FMMLA ###
+
+Floating-point matrix multiply-accumulate.
+
+ void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMOV ###
+
+Move 8-bit floating-point immediate to vector elements (predicated).
+
+ void fmov(const ZRegister& zd, const PRegisterM& pg, double imm)
+
+
+### FMOV ###
+
+Move 8-bit floating-point immediate to vector elements (unpredicated).
+
+ void fmov(const ZRegister& zd, double imm)
+
+
+### FMSB ###
+
+Floating-point fused multiply-subtract vectors (predicated), writing multiplicand [Zdn = Za + -Zdn * Zm].
+
+ void fmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### FMUL ###
+
+Floating-point multiply by immediate (predicated).
+
+ void fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FMUL ###
+
+Floating-point multiply by indexed elements.
+
+ void fmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ unsigned index)
+
+
+### FMUL ###
+
+Floating-point multiply vectors (predicated).
+
+ void fmul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FMUL ###
+
+Floating-point multiply vectors (unpredicated).
+
+ void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FMULX ###
+
+Floating-point multiply-extended vectors (predicated).
+
+ void fmulx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FNEG ###
+
+Floating-point negate (predicated).
+
+ void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FNMAD ###
+
+Floating-point negated fused multiply-add vectors (predicated), writing multiplicand [Zdn = -Za + -Zdn * Zm].
+
+ void fnmad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### FNMLA ###
+
+Floating-point negated fused multiply-add vectors (predicated), writing addend [Zda = -Zda + -Zn * Zm].
+
+ void fnmla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FNMLS ###
+
+Floating-point negated fused multiply-subtract vectors (predicated), writing addend [Zda = -Zda + Zn * Zm].
+
+ void fnmls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FNMSB ###
+
+Floating-point negated fused multiply-subtract vectors (predicated), writing multiplicand [Zdn = -Za + Zdn * Zm].
+
+ void fnmsb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### FRECPE ###
+
+Floating-point reciprocal estimate (unpredicated).
+
+ void frecpe(const ZRegister& zd, const ZRegister& zn)
+
+
+### FRECPS ###
+
+Floating-point reciprocal step (unpredicated).
+
+ void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FRECPX ###
+
+Floating-point reciprocal exponent (predicated).
+
+ void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTA ###
+
+Floating-point round to integral value (predicated).
+
+ void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTI ###
+
+Floating-point round to integral value (predicated).
+
+ void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTM ###
+
+Floating-point round to integral value (predicated).
+
+ void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTN ###
+
+Floating-point round to integral value (predicated).
+
+ void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTP ###
+
+Floating-point round to integral value (predicated).
+
+ void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTX ###
+
+Floating-point round to integral value (predicated).
+
+ void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRINTZ ###
+
+Floating-point round to integral value (predicated).
+
+ void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FRSQRTE ###
+
+Floating-point reciprocal square root estimate (unpredicated).
+
+ void frsqrte(const ZRegister& zd, const ZRegister& zn)
+
+
+### FRSQRTS ###
+
+Floating-point reciprocal square root step (unpredicated).
+
+ void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FSCALE ###
+
+Floating-point adjust exponent by vector (predicated).
+
+ void fscale(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FSQRT ###
+
+Floating-point square root (predicated).
+
+ void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### FSUB ###
+
+Floating-point subtract immediate (predicated).
+
+ void fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FSUB ###
+
+Floating-point subtract vectors (predicated).
+
+ void fsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FSUB ###
+
+Floating-point subtract vectors (unpredicated).
+
+ void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FSUBR ###
+
+Floating-point reversed subtract from immediate (predicated).
+
+ void fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ double imm)
+
+
+### FSUBR ###
+
+Floating-point reversed subtract vectors (predicated).
+
+ void fsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### FTMAD ###
+
+Floating-point trigonometric multiply-add coefficient.
+
+ void ftmad(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm3)
+
+
+### FTSMUL ###
+
+Floating-point trigonometric starting value.
+
+ void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### FTSSEL ###
+
+Floating-point trigonometric select coefficient.
+
+ void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-h">
+
+### HISTCNT ###
+
+Count matching elements in vector.
+
+ void histcnt(const ZRegister& zd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### HISTSEG ###
+
+Count matching elements in vector segments.
+
+ void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-i">
+
+### INCB ###
+
+Increment scalar by multiple of predicate constraint element count.
+
+ void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCD ###
+
+Increment scalar by multiple of predicate constraint element count.
+
+ void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCD ###
+
+Increment vector by multiple of predicate constraint element count.
+
+ void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCH ###
+
+Increment scalar by multiple of predicate constraint element count.
+
+ void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCH ###
+
+Increment vector by multiple of predicate constraint element count.
+
+ void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCP ###
+
+Increment scalar by active predicate element count.
+
+ void incp(const Register& rdn, const PRegisterWithLaneSize& pg)
+
+
+### INCP ###
+
+Increment vector by active predicate element count.
+
+ void incp(const ZRegister& zdn, const PRegister& pg)
+
+
+### INCW ###
+
+Increment scalar by multiple of predicate constraint element count.
+
+ void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INCW ###
+
+Increment vector by multiple of predicate constraint element count.
+
+ void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### INDEX ###
+
+Create index starting from and incremented by general-purpose register.
+
+ void index(const ZRegister& zd, const Register& rn, const Register& rm)
+
+
+### INDEX ###
+
+Create index starting from and incremented by immediate.
+
+ void index(const ZRegister& zd, int start, int step)
+
+
+### INDEX ###
+
+Create index starting from general-purpose register and incremented by immediate.
+
+ void index(const ZRegister& zd, const Register& rn, int imm5)
+
+
+### INDEX ###
+
+Create index starting from immediate and incremented by general-purpose register.
+
+ void index(const ZRegister& zd, int imm5, const Register& rm)
+
+
+### INSR ###
+
+Insert SIMD&FP scalar register in shifted vector.
+
+ void insr(const ZRegister& zdn, const VRegister& vm)
+
+
+### INSR ###
+
+Insert general-purpose register in shifted vector.
+
+ void insr(const ZRegister& zdn, const Register& rm)
+
+
+<a id="sve-l">
+
+### LASTA ###
+
+Extract element after last to SIMD&FP scalar register.
+
+ void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### LASTA ###
+
+Extract element after last to general-purpose register.
+
+ void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn)
+
+
+### LASTB ###
+
+Extract last element to SIMD&FP scalar register.
+
+ void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### LASTB ###
+
+Extract last element to general-purpose register.
+
+ void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn)
+
+
+### LD1B ###
+
+Contiguous/gather load bytes to vector.
+
+ void ld1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1D ###
+
+Contiguous/gather load doublewords to vector.
+
+ void ld1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1H ###
+
+Contiguous/gather load halfwords to vector.
+
+ void ld1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RB ###
+
+Load and broadcast unsigned byte to vector.
+
+ void ld1rb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RD ###
+
+Load and broadcast doubleword to vector.
+
+ void ld1rd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RH ###
+
+Load and broadcast unsigned halfword to vector.
+
+ void ld1rh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1ROB ###
+
+Contiguous load and replicate thirty-two bytes.
+
+ void ld1rob(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1ROD ###
+
+Contiguous load and replicate four doublewords.
+
+ void ld1rod(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1ROH ###
+
+Contiguous load and replicate sixteen halfwords.
+
+ void ld1roh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1ROW ###
+
+Contiguous load and replicate eight words.
+
+ void ld1row(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RQB ###
+
+Contiguous load and replicate sixteen bytes.
+
+ void ld1rqb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RQD ###
+
+Contiguous load and replicate two doublewords.
+
+ void ld1rqd(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RQH ###
+
+Contiguous load and replicate eight halfwords.
+
+ void ld1rqh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RQW ###
+
+Contiguous load and replicate four words.
+
+ void ld1rqw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RSB ###
+
+Load and broadcast signed byte to vector.
+
+ void ld1rsb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RSH ###
+
+Load and broadcast signed halfword to vector.
+
+ void ld1rsh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RSW ###
+
+Load and broadcast signed word to vector.
+
+ void ld1rsw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1RW ###
+
+Load and broadcast unsigned word to vector.
+
+ void ld1rw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1SB ###
+
+Contiguous/gather load signed bytes to vector.
+
+ void ld1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1SH ###
+
+Contiguous/gather load signed halfwords to vector.
+
+ void ld1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1SW ###
+
+Contiguous/gather load signed words to vector.
+
+ void ld1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD1W ###
+
+Contiguous/gather load words to vector.
+
+ void ld1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD2B ###
+
+Contiguous load two-byte structures to two vectors.
+
+ void ld2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD2D ###
+
+Contiguous load two-doubleword structures to two vectors.
+
+ void ld2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD2H ###
+
+Contiguous load two-halfword structures to two vectors.
+
+ void ld2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD2W ###
+
+Contiguous load two-word structures to two vectors.
+
+ void ld2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD3B ###
+
+Contiguous load three-byte structures to three vectors.
+
+ void ld3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD3D ###
+
+Contiguous load three-doubleword structures to three vectors.
+
+ void ld3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD3H ###
+
+Contiguous load three-halfword structures to three vectors.
+
+ void ld3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD3W ###
+
+Contiguous load three-word structures to three vectors.
+
+ void ld3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD4B ###
+
+Contiguous load four-byte structures to four vectors.
+
+ void ld4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD4D ###
+
+Contiguous load four-doubleword structures to four vectors.
+
+ void ld4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD4H ###
+
+Contiguous load four-halfword structures to four vectors.
+
+ void ld4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LD4W ###
+
+Contiguous load four-word structures to four vectors.
+
+ void ld4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1B ###
+
+Contiguous load first-fault unsigned bytes to vector.
+
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1B ###
+
+Gather load first-fault unsigned bytes to vector (immediate index).
+
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1B ###
+
+Gather load first-fault unsigned bytes to vector.
+
+ void ldff1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1D ###
+
+Contiguous load first-fault doublewords to vector.
+
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1D ###
+
+Gather load first-fault doublewords to vector (immediate index).
+
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1D ###
+
+Gather load first-fault doublewords to vector (vector index).
+
+ void ldff1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1H ###
+
+Contiguous load first-fault unsigned halfwords to vector.
+
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1H ###
+
+Gather load first-fault unsigned halfwords to vector (immediate index).
+
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1H ###
+
+Gather load first-fault unsigned halfwords to vector (vector index).
+
+ void ldff1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1SB ###
+
+Contiguous load first-fault signed bytes to vector.
+
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1SB ###
+
+Gather load first-fault signed bytes to vector (immediate index).
+
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1SB ###
+
+Gather load first-fault signed bytes to vector (vector index).
+
+ void ldff1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1SH ###
+
+Contiguous load first-fault signed halfwords to vector.
+
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1SH ###
+
+Gather load first-fault signed halfwords to vector (immediate index).
+
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1SH ###
+
+Gather load first-fault signed halfwords to vector (vector index).
+
+ void ldff1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1SW ###
+
+Contiguous load first-fault signed words to vector.
+
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1SW ###
+
+Gather load first-fault signed words to vector (immediate index).
+
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1SW ###
+
+Gather load first-fault signed words to vector (vector index).
+
+ void ldff1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDFF1W ###
+
+Contiguous load first-fault unsigned words to vector.
+
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDFF1W ###
+
+Gather load first-fault unsigned words to vector (immediate index).
+
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ int imm5)
+
+
+### LDFF1W ###
+
+Gather load first-fault unsigned words to vector (vector index).
+
+ void ldff1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const Register& xn,
+ const ZRegister& zm)
+
+
+### LDNF1B ###
+
+Contiguous load non-fault unsigned bytes to vector (immediate index).
+
+ void ldnf1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1D ###
+
+Contiguous load non-fault doublewords to vector (immediate index).
+
+ void ldnf1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1H ###
+
+Contiguous load non-fault unsigned halfwords to vector (immediate index).
+
+ void ldnf1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1SB ###
+
+Contiguous load non-fault signed bytes to vector (immediate index).
+
+ void ldnf1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1SH ###
+
+Contiguous load non-fault signed halfwords to vector (immediate index).
+
+ void ldnf1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1SW ###
+
+Contiguous load non-fault signed words to vector (immediate index).
+
+ void ldnf1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNF1W ###
+
+Contiguous load non-fault unsigned words to vector (immediate index).
+
+ void ldnf1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1B ###
+
+Contiguous load non-temporal bytes to vector.
+
+ void ldnt1b(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1D ###
+
+Contiguous load non-temporal doublewords to vector.
+
+ void ldnt1d(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1H ###
+
+Contiguous load non-temporal halfwords to vector.
+
+ void ldnt1h(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1SB ###
+
+Gather load non-temporal signed bytes.
+
+ void ldnt1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1SH ###
+
+Gather load non-temporal signed halfwords.
+
+ void ldnt1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1SW ###
+
+Gather load non-temporal signed words.
+
+ void ldnt1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDNT1W ###
+
+Contiguous load non-temporal words to vector.
+
+ void ldnt1w(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr)
+
+
+### LDR ###
+
+Load SVE predicate/vector register.
+
+ void ldr(const CPURegister& rt, const SVEMemOperand& addr)
+
+
+### LSL ###
+
+Logical shift left by 64-bit wide elements (predicated).
+
+ void lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### LSL ###
+
+Logical shift left by 64-bit wide elements (unpredicated).
+
+ void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### LSL ###
+
+Logical shift left by immediate (predicated).
+
+ void lsl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### LSL ###
+
+Logical shift left by immediate (unpredicated).
+
+ void lsl(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### LSLR ###
+
+Reversed logical shift left by vector (predicated).
+
+ void lslr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### LSR ###
+
+Logical shift right by 64-bit wide elements (predicated).
+
+ void lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### LSR ###
+
+Logical shift right by 64-bit wide elements (unpredicated).
+
+ void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### LSR ###
+
+Logical shift right by immediate (predicated).
+
+ void lsr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### LSR ###
+
+Logical shift right by immediate (unpredicated).
+
+ void lsr(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### LSRR ###
+
+Reversed logical shift right by vector (predicated).
+
+ void lsrr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+<a id="sve-m">
+
+### MAD ###
+
+Multiply-add vectors (predicated), writing multiplicand [Zdn = Za + Zdn * Zm].
+
+ void mad(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### MATCH ###
+
+Detect any matching elements, setting the condition flags.
+
+ void match(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### MLA ###
+
+Multiply-add to accumulator (indexed).
+
+ void mla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### MLA ###
+
+Multiply-add vectors (predicated), writing addend [Zda = Zda + Zn * Zm].
+
+ void mla(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### MLS ###
+
+Multiply-subtract from accumulator (indexed).
+
+ void mls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### MLS ###
+
+Multiply-subtract vectors (predicated), writing addend [Zda = Zda - Zn * Zm].
+
+ void mls(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### MOV ###
+
+Move SIMD&FP scalar register to vector elements (predicated)
+
+ void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn)
+
+
+### MOV ###
+
+Move SIMD&FP scalar register to vector elements (unpredicated)
+
+ void mov(const ZRegister& zd, const VRegister& vn)
+
+
+### MOV ###
+
+Move general-purpose register to vector elements (predicated)
+
+ void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn)
+
+
+### MOV ###
+
+Move general-purpose register to vector elements (unpredicated)
+
+ void mov(const ZRegister& zd, const Register& xn)
+
+
+### MOV ###
+
+Move indexed element to vector elements (unpredicated)
+
+ void mov(const ZRegister& zd, const ZRegister& zn, unsigned index)
+
+
+### MOV ###
+
+Move logical bitmask immediate to vector (unpredicated).
+
+ void mov(const ZRegister& zd, uint64_t imm)
+
+
+### MOV ###
+
+Move predicates (merging)
+
+ void mov(const PRegisterWithLaneSize& pd,
+ const PRegisterM& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### MOV ###
+
+Move predicates (unpredicated)
+
+ void mov(const PRegister& pd, const PRegister& pn)
+
+
+### MOV ###
+
+Move predicates (zeroing)
+
+ void mov(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### MOV ###
+
+Move signed immediate to vector elements (unpredicated).
+
+ void mov(const ZRegister& zd, int imm8, int shift)
+
+
+### MOV ###
+
+Move signed integer immediate to vector elements (predicated)
+
+ void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1)
+
+
+### MOV ###
+
+Move vector elements (predicated)
+
+ void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### MOV ###
+
+Move vector register (unpredicated)
+
+ void mov(const ZRegister& zd, const ZRegister& zn)
+
+
+### MOVPRFX ###
+
+Move prefix (predicated).
+
+ void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn)
+
+
+### MOVPRFX ###
+
+Move prefix (unpredicated).
+
+ void movprfx(const ZRegister& zd, const ZRegister& zn)
+
+
+### MOVS ###
+
+Move predicate (unpredicated), setting the condition flags
+
+ void movs(const PRegister& pd, const PRegister& pn)
+
+
+### MOVS ###
+
+Move predicates (zeroing), setting the condition flags
+
+ void movs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### MSB ###
+
+Multiply-subtract vectors (predicated), writing multiplicand [Zdn = Za - Zdn * Zm].
+
+ void msb(const ZRegister& zdn,
+ const PRegisterM& pg,
+ const ZRegister& zm,
+ const ZRegister& za)
+
+
+### MUL ###
+
+Multiply (indexed).
+
+ void mul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### MUL ###
+
+Multiply by immediate (unpredicated).
+
+ void mul(const ZRegister& zd, const ZRegister& zn, int imm8)
+
+
+### MUL ###
+
+Multiply vectors (predicated).
+
+ void mul(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### MUL ###
+
+Multiply vectors (unpredicated).
+
+ void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-n">
+
+### NAND ###
+
+Bitwise NAND predicates.
+
+ void nand(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### NANDS ###
+
+Bitwise NAND predicates.
+
+ void nands(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### NBSL ###
+
+Bitwise inverted select.
+
+ void nbsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk)
+
+
+### NEG ###
+
+Negate (predicated).
+
+ void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### NMATCH ###
+
+Detect no matching elements, setting the condition flags.
+
+ void nmatch(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### NOR ###
+
+Bitwise NOR predicates.
+
+ void nor(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### NORS ###
+
+Bitwise NOR predicates.
+
+ void nors(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### NOT ###
+
+Bitwise invert predicate.
+
+ void not_(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### NOT ###
+
+Bitwise invert vector (predicated).
+
+ void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### NOTS ###
+
+Bitwise invert predicate, setting the condition flags.
+
+ void nots(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+<a id="sve-o">
+
+### ORN ###
+
+Bitwise OR inverted predicate.
+
+ void orn(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ORN ###
+
+Bitwise OR with inverted immediate (unpredicated).
+
+ void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### ORNS ###
+
+Bitwise OR inverted predicate.
+
+ void orns(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ORR ###
+
+Bitwise OR predicate.
+
+ void orr(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ORR ###
+
+Bitwise OR vectors (predicated).
+
+ void orr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### ORR ###
+
+Bitwise OR vectors (unpredicated).
+
+ void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ORR ###
+
+Bitwise OR with immediate (unpredicated).
+
+ void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm)
+
+
+### ORRS ###
+
+Bitwise OR predicate.
+
+ void orrs(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ORV ###
+
+Bitwise OR reduction to scalar.
+
+ void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+<a id="sve-p">
+
+### PFALSE ###
+
+Set all predicate elements to false.
+
+ void pfalse(const PRegisterWithLaneSize& pd)
+
+
+### PFIRST ###
+
+Set the first active predicate element to true.
+
+ void pfirst(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### PMUL ###
+
+Polynomial multiply vectors (unpredicated).
+
+ void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### PMULLB ###
+
+Polynomial multiply long (bottom).
+
+ void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### PMULLT ###
+
+Polynomial multiply long (top).
+
+ void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### PNEXT ###
+
+Find next active predicate.
+
+ void pnext(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn)
+
+
+### PRFB ###
+
+Prefetch bytes.
+
+ void prfb(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### PRFD ###
+
+Prefetch doublewords.
+
+ void prfd(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### PRFH ###
+
+Prefetch halfwords.
+
+ void prfh(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### PRFW ###
+
+Prefetch words.
+
+ void prfw(PrefetchOperation prfop,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### PTEST ###
+
+Set condition flags for predicate.
+
+ void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn)
+
+
+### PTRUE ###
+
+Initialise predicate from named constraint.
+
+ void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL)
+
+
+### PTRUES ###
+
+Initialise predicate from named constraint.
+
+ void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL)
+
+
+### PUNPKHI ###
+
+Unpack and widen half of predicate.
+
+ void punpkhi(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn)
+
+
+### PUNPKLO ###
+
+Unpack and widen half of predicate.
+
+ void punpklo(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn)
+
+
+<a id="sve-r">
+
+### RADDHNB ###
+
+Rounding add narrow high part (bottom).
+
+ void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### RADDHNT ###
+
+Rounding add narrow high part (top).
+
+ void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### RBIT ###
+
+Reverse bits (predicated).
+
+ void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### RDFFR ###
+
+Read the first-fault register.
+
+ void rdffr(const PRegisterWithLaneSize& pd)
+
+
+### RDFFR ###
+
+Return predicate of succesfully loaded elements.
+
+ void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg)
+
+
+### RDFFRS ###
+
+Return predicate of succesfully loaded elements.
+
+ void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg)
+
+
+### RDVL ###
+
+Read multiple of vector register size to scalar register.
+
+ void rdvl(const Register& xd, int imm6)
+
+
+### REV ###
+
+Reverse all elements in a predicate.
+
+ void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn)
+
+
+### REV ###
+
+Reverse all elements in a vector (unpredicated).
+
+ void rev(const ZRegister& zd, const ZRegister& zn)
+
+
+### REVB ###
+
+Reverse bytes / halfwords / words within elements (predicated).
+
+ void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### REVH ###
+
+Reverse bytes / halfwords / words within elements (predicated).
+
+ void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### REVW ###
+
+Reverse bytes / halfwords / words within elements (predicated).
+
+ void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### RSHRNB ###
+
+Rounding shift right narrow by immediate (bottom).
+
+ void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### RSHRNT ###
+
+Rounding shift right narrow by immediate (top).
+
+ void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### RSUBHNB ###
+
+Rounding subtract narrow high part (bottom).
+
+ void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### RSUBHNT ###
+
+Rounding subtract narrow high part (top).
+
+ void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-s">
+
+### SABA ###
+
+Signed absolute difference and accumulate.
+
+ void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SABALB ###
+
+Signed absolute difference and accumulate long (bottom).
+
+ void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SABALT ###
+
+Signed absolute difference and accumulate long (top).
+
+ void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SABD ###
+
+Signed absolute difference (predicated).
+
+ void sabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SABDLB ###
+
+Signed absolute difference long (bottom).
+
+ void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SABDLT ###
+
+Signed absolute difference long (top).
+
+ void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SADALP ###
+
+Signed add and accumulate long pairwise.
+
+ void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SADDLB ###
+
+Signed add long (bottom).
+
+ void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SADDLBT ###
+
+Signed add long (bottom + top).
+
+ void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SADDLT ###
+
+Signed add long (top).
+
+ void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SADDV ###
+
+Signed add reduction to scalar.
+
+ void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn)
+
+
+### SADDWB ###
+
+Signed add wide (bottom).
+
+ void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SADDWT ###
+
+Signed add wide (top).
+
+ void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SBCLB ###
+
+Subtract with carry long (bottom).
+
+ void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SBCLT ###
+
+Subtract with carry long (top).
+
+ void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SCVTF ###
+
+Signed integer convert to floating-point (predicated).
+
+ void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SDIV ###
+
+Signed divide (predicated).
+
+ void sdiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SDIVR ###
+
+Signed reversed divide (predicated).
+
+ void sdivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SDOT ###
+
+Signed dot product by indexed quadtuplet.
+
+ void sdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SDOT ###
+
+Signed dot product.
+
+ void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SEL ###
+
+Conditionally select elements from two predicates.
+
+ void sel(const PRegisterWithLaneSize& pd,
+ const PRegister& pg,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### SEL ###
+
+Conditionally select elements from two vectors.
+
+ void sel(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SETFFR ###
+
+Initialise the first-fault register to all true.
+
+ void setffr()
+
+
+### SHADD ###
+
+Signed halving addition.
+
+ void shadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SHRNB ###
+
+Shift right narrow by immediate (bottom).
+
+ void shrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SHRNT ###
+
+Shift right narrow by immediate (top).
+
+ void shrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SHSUB ###
+
+Signed halving subtract.
+
+ void shsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SHSUBR ###
+
+Signed halving subtract reversed vectors.
+
+ void shsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SLI ###
+
+Shift left and insert (immediate).
+
+ void sli(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SMAX ###
+
+Signed maximum vectors (predicated).
+
+ void smax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SMAX ###
+
+Signed maximum with immediate (unpredicated).
+
+ void smax(const ZRegister& zd, const ZRegister& zn, int imm8)
+
+
+### SMAXP ###
+
+Signed maximum pairwise.
+
+ void smaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SMAXV ###
+
+Signed maximum reduction to scalar.
+
+ void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### SMIN ###
+
+Signed minimum vectors (predicated).
+
+ void smin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SMIN ###
+
+Signed minimum with immediate (unpredicated).
+
+ void smin(const ZRegister& zd, const ZRegister& zn, int imm8)
+
+
+### SMINP ###
+
+Signed minimum pairwise.
+
+ void sminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SMINV ###
+
+Signed minimum reduction to scalar.
+
+ void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### SMLALB ###
+
+Signed multiply-add long to accumulator (bottom).
+
+ void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMLALB ###
+
+Signed multiply-add long to accumulator (bottom, indexed).
+
+ void smlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SMLALT ###
+
+Signed multiply-add long to accumulator (top).
+
+ void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMLALT ###
+
+Signed multiply-add long to accumulator (top, indexed).
+
+ void smlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SMLSLB ###
+
+Signed multiply-subtract long from accumulator (bottom).
+
+ void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMLSLB ###
+
+Signed multiply-subtract long from accumulator (bottom, indexed).
+
+ void smlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SMLSLT ###
+
+Signed multiply-subtract long from accumulator (top).
+
+ void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMLSLT ###
+
+Signed multiply-subtract long from accumulator (top, indexed).
+
+ void smlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SMMLA ###
+
+Signed integer matrix multiply-accumulate.
+
+ void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMULH ###
+
+Signed multiply returning high half (predicated).
+
+ void smulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SMULH ###
+
+Signed multiply returning high half (unpredicated).
+
+ void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMULLB ###
+
+Signed multiply long (bottom).
+
+ void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMULLB ###
+
+Signed multiply long (bottom, indexed).
+
+ void smullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SMULLT ###
+
+Signed multiply long (top).
+
+ void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SMULLT ###
+
+Signed multiply long (top, indexed).
+
+ void smullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SPLICE ###
+
+Splice two vectors under predicate control.
+
+ void splice(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQABS ###
+
+Signed saturating absolute value.
+
+ void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SQADD ###
+
+Signed saturating add immediate (unpredicated).
+
+ void sqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1)
+
+
+### SQADD ###
+
+Signed saturating add vectors (unpredicated).
+
+ void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQADD ###
+
+Signed saturating addition (predicated).
+
+ void sqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQCADD ###
+
+Saturating complex integer add with rotate.
+
+ void sqcadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### SQDECB ###
+
+Signed saturating decrement scalar by multiple of 8-bit predicate constraint element count.
+
+ void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECB ###
+
+Signed saturating decrement scalar by multiple of 8-bit predicate constraint element count.
+
+ void sqdecb(const Register& xd,
+ const Register& wn,
+ int pattern,
+ int multiplier)
+
+
+### SQDECD ###
+
+Signed saturating decrement scalar by multiple of 64-bit predicate constraint element count.
+
+ void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECD ###
+
+Signed saturating decrement scalar by multiple of 64-bit predicate constraint element count.
+
+ void sqdecd(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQDECD ###
+
+Signed saturating decrement vector by multiple of 64-bit predicate constraint element count.
+
+ void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECH ###
+
+Signed saturating decrement scalar by multiple of 16-bit predicate constraint element count.
+
+ void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECH ###
+
+Signed saturating decrement scalar by multiple of 16-bit predicate constraint element count.
+
+ void sqdech(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQDECH ###
+
+Signed saturating decrement vector by multiple of 16-bit predicate constraint element count.
+
+ void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECP ###
+
+Signed saturating decrement scalar by active predicate element count.
+
+ void sqdecp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn)
+
+
+### SQDECP ###
+
+Signed saturating decrement scalar by active predicate element count.
+
+ void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg)
+
+
+### SQDECP ###
+
+Signed saturating decrement vector by active predicate element count.
+
+ void sqdecp(const ZRegister& zdn, const PRegister& pg)
+
+
+### SQDECW ###
+
+Signed saturating decrement scalar by multiple of 32-bit predicate constraint element count.
+
+ void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDECW ###
+
+Signed saturating decrement scalar by multiple of 32-bit predicate constraint element count.
+
+ void sqdecw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQDECW ###
+
+Signed saturating decrement vector by multiple of 32-bit predicate constraint element count.
+
+ void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQDMLALB ###
+
+Signed saturating doubling multiply-add long to accumulator (bottom).
+
+ void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMLALB ###
+
+Signed saturating doubling multiply-add long to accumulator (bottom, indexed).
+
+ void sqdmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMLALBT ###
+
+Signed saturating doubling multiply-add long to accumulator (bottom x top).
+
+ void sqdmlalbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQDMLALT ###
+
+Signed saturating doubling multiply-add long to accumulator (top).
+
+ void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMLALT ###
+
+Signed saturating doubling multiply-add long to accumulator (top, indexed).
+
+ void sqdmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMLSLB ###
+
+Signed saturating doubling multiply-subtract long from accumulator (bottom).
+
+ void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMLSLB ###
+
+Signed saturating doubling multiply-subtract long from accumulator (bottom, indexed).
+
+ void sqdmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMLSLBT ###
+
+Signed saturating doubling multiply-subtract long from accumulator (bottom x top).
+
+ void sqdmlslbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQDMLSLT ###
+
+Signed saturating doubling multiply-subtract long from accumulator (top).
+
+ void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMLSLT ###
+
+Signed saturating doubling multiply-subtract long from accumulator (top, indexed).
+
+ void sqdmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMULH ###
+
+Signed saturating doubling multiply high (indexed).
+
+ void sqdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMULH ###
+
+Signed saturating doubling multiply high (unpredicated).
+
+ void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMULLB ###
+
+Signed saturating doubling multiply long (bottom).
+
+ void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMULLB ###
+
+Signed saturating doubling multiply long (bottom, indexed).
+
+ void sqdmullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQDMULLT ###
+
+Signed saturating doubling multiply long (top).
+
+ void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQDMULLT ###
+
+Signed saturating doubling multiply long (top, indexed).
+
+ void sqdmullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQINCB ###
+
+Signed saturating increment scalar by multiple of 8-bit predicate constraint element count.
+
+ void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCB ###
+
+Signed saturating increment scalar by multiple of 8-bit predicate constraint element count.
+
+ void sqincb(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQINCD ###
+
+Signed saturating increment scalar by multiple of 64-bit predicate constraint element count.
+
+ void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCD ###
+
+Signed saturating increment scalar by multiple of 64-bit predicate constraint element count.
+
+ void sqincd(const Register& xd,
+ const Register& wn,
+ int pattern,
+ int multiplier)
+
+
+### SQINCD ###
+
+Signed saturating increment vector by multiple of 64-bit predicate constraint element count.
+
+ void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCH ###
+
+Signed saturating increment scalar by multiple of 16-bit predicate constraint element count.
+
+ void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCH ###
+
+Signed saturating increment scalar by multiple of 16-bit predicate constraint element count.
+
+ void sqinch(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQINCH ###
+
+Signed saturating increment vector by multiple of 16-bit predicate constraint element count.
+
+ void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCP ###
+
+Signed saturating increment scalar by active predicate element count.
+
+ void sqincp(const Register& xd,
+ const PRegisterWithLaneSize& pg,
+ const Register& wn)
+
+
+### SQINCP ###
+
+Signed saturating increment scalar by active predicate element count.
+
+ void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg)
+
+
+### SQINCP ###
+
+Signed saturating increment vector by active predicate element count.
+
+ void sqincp(const ZRegister& zdn, const PRegister& pg)
+
+
+### SQINCW ###
+
+Signed saturating increment scalar by multiple of 32-bit predicate constraint element count.
+
+ void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQINCW ###
+
+Signed saturating increment scalar by multiple of 32-bit predicate constraint element count.
+
+ void sqincw(const Register& xd,
+ const Register& wn,
+ int pattern = SVE_ALL,
+ int multiplier = 1)
+
+
+### SQINCW ###
+
+Signed saturating increment vector by multiple of 32-bit predicate constraint element count.
+
+ void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### SQNEG ###
+
+Signed saturating negate.
+
+ void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SQRDCMLAH ###
+
+Saturating rounding doubling complex integer multiply-add high with rotate (indexed).
+
+ void sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot)
+
+
+### SQRDCMLAH ###
+
+Saturating rounding doubling complex integer multiply-add high with rotate.
+
+ void sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot)
+
+
+### SQRDMLAH ###
+
+Signed saturating rounding doubling multiply-add high to accumulator (indexed).
+
+ void sqrdmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQRDMLAH ###
+
+Signed saturating rounding doubling multiply-add high to accumulator (unpredicated).
+
+ void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQRDMLSH ###
+
+Signed saturating rounding doubling multiply-subtract high from accumulator (indexed).
+
+ void sqrdmlsh(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQRDMLSH ###
+
+Signed saturating rounding doubling multiply-subtract high from accumulator (unpredicated).
+
+ void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQRDMULH ###
+
+Signed saturating rounding doubling multiply high (indexed).
+
+ void sqrdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SQRDMULH ###
+
+Signed saturating rounding doubling multiply high (unpredicated).
+
+ void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQRSHL ###
+
+Signed saturating rounding shift left by vector (predicated).
+
+ void sqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQRSHLR ###
+
+Signed saturating rounding shift left reversed vectors (predicated).
+
+ void sqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQRSHRNB ###
+
+Signed saturating rounding shift right narrow by immediate (bottom).
+
+ void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQRSHRNT ###
+
+Signed saturating rounding shift right narrow by immediate (top).
+
+ void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQRSHRUNB ###
+
+Signed saturating rounding shift right unsigned narrow by immediate (bottom).
+
+ void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQRSHRUNT ###
+
+Signed saturating rounding shift right unsigned narrow by immediate (top).
+
+ void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQSHL ###
+
+Signed saturating shift left by immediate.
+
+ void sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### SQSHL ###
+
+Signed saturating shift left by vector (predicated).
+
+ void sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQSHLR ###
+
+Signed saturating shift left reversed vectors (predicated).
+
+ void sqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQSHLU ###
+
+Signed saturating shift left unsigned by immediate.
+
+ void sqshlu(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### SQSHRNB ###
+
+Signed saturating shift right narrow by immediate (bottom).
+
+ void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQSHRNT ###
+
+Signed saturating shift right narrow by immediate (top).
+
+ void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQSHRUNB ###
+
+Signed saturating shift right unsigned narrow by immediate (bottom).
+
+ void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQSHRUNT ###
+
+Signed saturating shift right unsigned narrow by immediate (top).
+
+ void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SQSUB ###
+
+Signed saturating subtract immediate (unpredicated).
+
+ void sqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1)
+
+
+### SQSUB ###
+
+Signed saturating subtract vectors (unpredicated).
+
+ void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SQSUB ###
+
+Signed saturating subtraction (predicated).
+
+ void sqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQSUBR ###
+
+Signed saturating subtraction reversed vectors (predicated).
+
+ void sqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SQXTNB ###
+
+Signed saturating extract narrow (bottom).
+
+ void sqxtnb(const ZRegister& zd, const ZRegister& zn)
+
+
+### SQXTNT ###
+
+Signed saturating extract narrow (top).
+
+ void sqxtnt(const ZRegister& zd, const ZRegister& zn)
+
+
+### SQXTUNB ###
+
+Signed saturating unsigned extract narrow (bottom).
+
+ void sqxtunb(const ZRegister& zd, const ZRegister& zn)
+
+
+### SQXTUNT ###
+
+Signed saturating unsigned extract narrow (top).
+
+ void sqxtunt(const ZRegister& zd, const ZRegister& zn)
+
+
+### SRHADD ###
+
+Signed rounding halving addition.
+
+ void srhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SRI ###
+
+Shift right and insert (immediate).
+
+ void sri(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SRSHL ###
+
+Signed rounding shift left by vector (predicated).
+
+ void srshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SRSHLR ###
+
+Signed rounding shift left reversed vectors (predicated).
+
+ void srshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SRSHR ###
+
+Signed rounding shift right by immediate.
+
+ void srshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### SRSRA ###
+
+Signed rounding shift right and accumulate (immediate).
+
+ void srsra(const ZRegister& zda, const ZRegister& zn, int shift)
+
+
+### SSHLLB ###
+
+Signed shift left long by immediate (bottom).
+
+ void sshllb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SSHLLT ###
+
+Signed shift left long by immediate (top).
+
+ void sshllt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### SSRA ###
+
+Signed shift right and accumulate (immediate).
+
+ void ssra(const ZRegister& zda, const ZRegister& zn, int shift)
+
+
+### SSUBLB ###
+
+Signed subtract long (bottom).
+
+ void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SSUBLBT ###
+
+Signed subtract long (bottom - top).
+
+ void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SSUBLT ###
+
+Signed subtract long (top).
+
+ void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SSUBLTB ###
+
+Signed subtract long (top - bottom).
+
+ void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SSUBWB ###
+
+Signed subtract wide (bottom).
+
+ void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SSUBWT ###
+
+Signed subtract wide (top).
+
+ void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ST1B ###
+
+Contiguous/scatter store bytes from vector.
+
+ void st1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST1D ###
+
+Contiguous/scatter store doublewords from vector.
+
+ void st1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST1H ###
+
+Contiguous/scatter store halfwords from vector.
+
+ void st1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST1W ###
+
+Contiguous/scatter store words from vector.
+
+ void st1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST2B ###
+
+Contiguous store two-byte structures from two vectors.
+
+ void st2b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST2D ###
+
+Contiguous store two-doubleword structures from two vectors,
+
+ void st2d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST2H ###
+
+Contiguous store two-halfword structures from two vectors.
+
+ void st2h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST2W ###
+
+Contiguous store two-word structures from two vectors.
+
+ void st2w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST3B ###
+
+Contiguous store three-byte structures from three vectors.
+
+ void st3b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST3D ###
+
+Contiguous store three-doubleword structures from three vectors.
+
+ void st3d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST3H ###
+
+Contiguous store three-halfword structures from three vectors.
+
+ void st3h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST3W ###
+
+Contiguous store three-word structures from three vectors.
+
+ void st3w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST4B ###
+
+Contiguous store four-byte structures from four vectors.
+
+ void st4b(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST4D ###
+
+Contiguous store four-doubleword structures from four vectors.
+
+ void st4d(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST4H ###
+
+Contiguous store four-halfword structures from four vectors.
+
+ void st4h(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### ST4W ###
+
+Contiguous store four-word structures from four vectors.
+
+ void st4w(const ZRegister& zt1,
+ const ZRegister& zt2,
+ const ZRegister& zt3,
+ const ZRegister& zt4,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### STNT1B ###
+
+Contiguous store non-temporal bytes from vector.
+
+ void stnt1b(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### STNT1D ###
+
+Contiguous store non-temporal doublewords from vector.
+
+ void stnt1d(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### STNT1H ###
+
+Contiguous store non-temporal halfwords from vector.
+
+ void stnt1h(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### STNT1W ###
+
+Contiguous store non-temporal words from vector.
+
+ void stnt1w(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr)
+
+
+### STR ###
+
+Store SVE predicate/vector register.
+
+ void str(const CPURegister& rt, const SVEMemOperand& addr)
+
+
+### SUB ###
+
+Subtract immediate (unpredicated).
+
+ void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1)
+
+
+### SUB ###
+
+Subtract vectors (predicated).
+
+ void sub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SUB ###
+
+Subtract vectors (unpredicated).
+
+ void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SUBHNB ###
+
+Subtract narrow high part (bottom).
+
+ void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SUBHNT ###
+
+Subtract narrow high part (top).
+
+ void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### SUBR ###
+
+Reversed subtract from immediate (unpredicated).
+
+ void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1)
+
+
+### SUBR ###
+
+Reversed subtract vectors (predicated).
+
+ void subr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SUDOT ###
+
+Signed by unsigned integer indexed dot product.
+
+ void sudot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### SUNPKHI ###
+
+Signed unpack and extend half of vector.
+
+ void sunpkhi(const ZRegister& zd, const ZRegister& zn)
+
+
+### SUNPKLO ###
+
+Signed unpack and extend half of vector.
+
+ void sunpklo(const ZRegister& zd, const ZRegister& zn)
+
+
+### SUQADD ###
+
+Signed saturating addition of unsigned value.
+
+ void suqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### SXTB ###
+
+Signed byte extend (predicated).
+
+ void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SXTH ###
+
+Signed halfword extend (predicated).
+
+ void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### SXTW ###
+
+Signed word extend (predicated).
+
+ void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+<a id="sve-t">
+
+### TBL ###
+
+Programmable table lookup in one or two vector table (zeroing).
+
+ void tbl(const ZRegister& zd,
+ const ZRegister& zn1,
+ const ZRegister& zn2,
+ const ZRegister& zm)
+
+
+### TBL ###
+
+Programmable table lookup/permute using vector of indices into a vector.
+
+ void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### TBX ###
+
+Programmable table lookup in single vector table (merging).
+
+ void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### TRN1 ###
+
+Interleave even or odd elements from two predicates.
+
+ void trn1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### TRN1 ###
+
+Interleave even or odd elements from two vectors.
+
+ void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### TRN2 ###
+
+Interleave even or odd elements from two predicates.
+
+ void trn2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### TRN2 ###
+
+Interleave even or odd elements from two vectors.
+
+ void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-u">
+
+### UABA ###
+
+Unsigned absolute difference and accumulate.
+
+ void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UABALB ###
+
+Unsigned absolute difference and accumulate long (bottom).
+
+ void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UABALT ###
+
+Unsigned absolute difference and accumulate long (top).
+
+ void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UABD ###
+
+Unsigned absolute difference (predicated).
+
+ void uabd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UABDLB ###
+
+Unsigned absolute difference long (bottom).
+
+ void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UABDLT ###
+
+Unsigned absolute difference long (top).
+
+ void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UADALP ###
+
+Unsigned add and accumulate long pairwise.
+
+ void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn)
+
+
+### UADDLB ###
+
+Unsigned add long (bottom).
+
+ void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UADDLT ###
+
+Unsigned add long (top).
+
+ void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UADDV ###
+
+Unsigned add reduction to scalar.
+
+ void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn)
+
+
+### UADDWB ###
+
+Unsigned add wide (bottom).
+
+ void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UADDWT ###
+
+Unsigned add wide (top).
+
+ void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UCVTF ###
+
+Unsigned integer convert to floating-point (predicated).
+
+ void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### UDIV ###
+
+Unsigned divide (predicated).
+
+ void udiv(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UDIVR ###
+
+Unsigned reversed divide (predicated).
+
+ void udivr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UDOT ###
+
+Unsigned dot product by indexed quadtuplet.
+
+ void udot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UDOT ###
+
+Unsigned dot product.
+
+ void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UHADD ###
+
+Unsigned halving addition.
+
+ void uhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UHSUB ###
+
+Unsigned halving subtract.
+
+ void uhsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UHSUBR ###
+
+Unsigned halving subtract reversed vectors.
+
+ void uhsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMAX ###
+
+Unsigned maximum vectors (predicated).
+
+ void umax(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMAX ###
+
+Unsigned maximum with immediate (unpredicated).
+
+ void umax(const ZRegister& zd, const ZRegister& zn, int imm8)
+
+
+### UMAXP ###
+
+Unsigned maximum pairwise.
+
+ void umaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMAXV ###
+
+Unsigned maximum reduction to scalar.
+
+ void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### UMIN ###
+
+Unsigned minimum vectors (predicated).
+
+ void umin(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMIN ###
+
+Unsigned minimum with immediate (unpredicated).
+
+ void umin(const ZRegister& zd, const ZRegister& zn, int imm8)
+
+
+### UMINP ###
+
+Unsigned minimum pairwise.
+
+ void uminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMINV ###
+
+Unsigned minimum reduction to scalar.
+
+ void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn)
+
+
+### UMLALB ###
+
+Unsigned multiply-add long to accumulator (bottom).
+
+ void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMLALB ###
+
+Unsigned multiply-add long to accumulator (bottom, indexed).
+
+ void umlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UMLALT ###
+
+Unsigned multiply-add long to accumulator (top).
+
+ void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMLALT ###
+
+Unsigned multiply-add long to accumulator (top, indexed).
+
+ void umlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UMLSLB ###
+
+Unsigned multiply-subtract long from accumulator (bottom).
+
+ void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMLSLB ###
+
+Unsigned multiply-subtract long from accumulator (bottom, indexed).
+
+ void umlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UMLSLT ###
+
+Unsigned multiply-subtract long from accumulator (top).
+
+ void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMLSLT ###
+
+Unsigned multiply-subtract long from accumulator (top, indexed).
+
+ void umlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UMMLA ###
+
+Unsigned integer matrix multiply-accumulate.
+
+ void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMULH ###
+
+Unsigned multiply returning high half (predicated).
+
+ void umulh(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UMULH ###
+
+Unsigned multiply returning high half (unpredicated).
+
+ void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMULLB ###
+
+Unsigned multiply long (bottom).
+
+ void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMULLB ###
+
+Unsigned multiply long (bottom, indexed).
+
+ void umullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UMULLT ###
+
+Unsigned multiply long (top).
+
+ void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UMULLT ###
+
+Unsigned multiply long (top, indexed).
+
+ void umullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### UQADD ###
+
+Unsigned saturating add immediate (unpredicated).
+
+ void uqadd(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1)
+
+
+### UQADD ###
+
+Unsigned saturating add vectors (unpredicated).
+
+ void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UQADD ###
+
+Unsigned saturating addition (predicated).
+
+ void uqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQDECB ###
+
+Unsigned saturating decrement scalar by multiple of 8-bit predicate constraint element count.
+
+ void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECD ###
+
+Unsigned saturating decrement scalar by multiple of 64-bit predicate constraint element count.
+
+ void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECD ###
+
+Unsigned saturating decrement vector by multiple of 64-bit predicate constraint element count.
+
+ void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECH ###
+
+Unsigned saturating decrement scalar by multiple of 16-bit predicate constraint element count.
+
+ void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECH ###
+
+Unsigned saturating decrement vector by multiple of 16-bit predicate constraint element count.
+
+ void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECP ###
+
+Unsigned saturating decrement scalar by active predicate element count.
+
+ void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg)
+
+
+### UQDECP ###
+
+Unsigned saturating decrement vector by active predicate element count.
+
+ void uqdecp(const ZRegister& zdn, const PRegister& pg)
+
+
+### UQDECW ###
+
+Unsigned saturating decrement scalar by multiple of 32-bit predicate constraint element count.
+
+ void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQDECW ###
+
+Unsigned saturating decrement vector by multiple of 32-bit predicate constraint element count.
+
+ void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCB ###
+
+Unsigned saturating increment scalar by multiple of 8-bit predicate constraint element count.
+
+ void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCD ###
+
+Unsigned saturating increment scalar by multiple of 64-bit predicate constraint element count.
+
+ void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCD ###
+
+Unsigned saturating increment vector by multiple of 64-bit predicate constraint element count.
+
+ void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCH ###
+
+Unsigned saturating increment scalar by multiple of 16-bit predicate constraint element count.
+
+ void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCH ###
+
+Unsigned saturating increment vector by multiple of 16-bit predicate constraint element count.
+
+ void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCP ###
+
+Unsigned saturating increment scalar by active predicate element count.
+
+ void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg)
+
+
+### UQINCP ###
+
+Unsigned saturating increment vector by active predicate element count.
+
+ void uqincp(const ZRegister& zdn, const PRegister& pg)
+
+
+### UQINCW ###
+
+Unsigned saturating increment scalar by multiple of 32-bit predicate constraint element count.
+
+ void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQINCW ###
+
+Unsigned saturating increment vector by multiple of 32-bit predicate constraint element count.
+
+ void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1)
+
+
+### UQRSHL ###
+
+Unsigned saturating rounding shift left by vector (predicated).
+
+ void uqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQRSHLR ###
+
+Unsigned saturating rounding shift left reversed vectors (predicated).
+
+ void uqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQRSHRNB ###
+
+Unsigned saturating rounding shift right narrow by immediate (bottom).
+
+ void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### UQRSHRNT ###
+
+Unsigned saturating rounding shift right narrow by immediate (top).
+
+ void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### UQSHL ###
+
+Unsigned saturating shift left by immediate.
+
+ void uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### UQSHL ###
+
+Unsigned saturating shift left by vector (predicated).
+
+ void uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQSHLR ###
+
+Unsigned saturating shift left reversed vectors (predicated).
+
+ void uqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQSHRNB ###
+
+Unsigned saturating shift right narrow by immediate (bottom).
+
+ void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### UQSHRNT ###
+
+Unsigned saturating shift right narrow by immediate (top).
+
+ void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### UQSUB ###
+
+Unsigned saturating subtract immediate (unpredicated).
+
+ void uqsub(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm8,
+ int shift = -1)
+
+
+### UQSUB ###
+
+Unsigned saturating subtract vectors (unpredicated).
+
+ void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UQSUB ###
+
+Unsigned saturating subtraction (predicated).
+
+ void uqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQSUBR ###
+
+Unsigned saturating subtraction reversed vectors (predicated).
+
+ void uqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### UQXTNB ###
+
+Unsigned saturating extract narrow (bottom).
+
+ void uqxtnb(const ZRegister& zd, const ZRegister& zn)
+
+
+### UQXTNT ###
+
+Unsigned saturating extract narrow (top).
+
+ void uqxtnt(const ZRegister& zd, const ZRegister& zn)
+
+
+### URECPE ###
+
+Unsigned reciprocal estimate (predicated).
+
+ void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### URHADD ###
+
+Unsigned rounding halving addition.
+
+ void urhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### URSHL ###
+
+Unsigned rounding shift left by vector (predicated).
+
+ void urshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### URSHLR ###
+
+Unsigned rounding shift left reversed vectors (predicated).
+
+ void urshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### URSHR ###
+
+Unsigned rounding shift right by immediate.
+
+ void urshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift)
+
+
+### URSQRTE ###
+
+Unsigned reciprocal square root estimate (predicated).
+
+ void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### URSRA ###
+
+Unsigned rounding shift right and accumulate (immediate).
+
+ void ursra(const ZRegister& zda, const ZRegister& zn, int shift)
+
+
+### USDOT ###
+
+Unsigned by signed integer dot product.
+
+ void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### USDOT ###
+
+Unsigned by signed integer indexed dot product.
+
+ void usdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index)
+
+
+### USHLLB ###
+
+Unsigned shift left long by immediate (bottom).
+
+ void ushllb(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### USHLLT ###
+
+Unsigned shift left long by immediate (top).
+
+ void ushllt(const ZRegister& zd, const ZRegister& zn, int shift)
+
+
+### USMMLA ###
+
+Unsigned by signed integer matrix multiply-accumulate.
+
+ void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm)
+
+
+### USQADD ###
+
+Unsigned saturating addition of signed value.
+
+ void usqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm)
+
+
+### USRA ###
+
+Unsigned shift right and accumulate (immediate).
+
+ void usra(const ZRegister& zda, const ZRegister& zn, int shift)
+
+
+### USUBLB ###
+
+Unsigned subtract long (bottom).
+
+ void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### USUBLT ###
+
+Unsigned subtract long (top).
+
+ void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### USUBWB ###
+
+Unsigned subtract wide (bottom).
+
+ void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### USUBWT ###
+
+Unsigned subtract wide (top).
+
+ void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UUNPKHI ###
+
+Unsigned unpack and extend half of vector.
+
+ void uunpkhi(const ZRegister& zd, const ZRegister& zn)
+
+
+### UUNPKLO ###
+
+Unsigned unpack and extend half of vector.
+
+ void uunpklo(const ZRegister& zd, const ZRegister& zn)
+
+
+### UXTB ###
+
+Unsigned byte extend (predicated).
+
+ void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### UXTH ###
+
+Unsigned halfword extend (predicated).
+
+ void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### UXTW ###
+
+Unsigned word extend (predicated).
+
+ void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn)
+
+
+### UZP1 ###
+
+Concatenate even or odd elements from two predicates.
+
+ void uzp1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### UZP1 ###
+
+Concatenate even or odd elements from two vectors.
+
+ void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### UZP2 ###
+
+Concatenate even or odd elements from two predicates.
+
+ void uzp2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### UZP2 ###
+
+Concatenate even or odd elements from two vectors.
+
+ void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+<a id="sve-w">
+
+### WHILEGE ###
+
+While decrementing signed scalar greater than or equal to scalar.
+
+ void whilege(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILEGT ###
+
+While decrementing signed scalar greater than scalar.
+
+ void whilegt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILEHI ###
+
+While decrementing unsigned scalar higher than scalar.
+
+ void whilehi(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILEHS ###
+
+While decrementing unsigned scalar higher or same as scalar.
+
+ void whilehs(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILELE ###
+
+While incrementing signed scalar less than or equal to scalar.
+
+ void whilele(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILELO ###
+
+While incrementing unsigned scalar lower than scalar.
+
+ void whilelo(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILELS ###
+
+While incrementing unsigned scalar lower or same as scalar.
+
+ void whilels(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILELT ###
+
+While incrementing signed scalar less than scalar.
+
+ void whilelt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILERW ###
+
+While free of read-after-write conflicts.
+
+ void whilerw(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WHILEWR ###
+
+While free of write-after-read/write conflicts.
+
+ void whilewr(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm)
+
+
+### WRFFR ###
+
+Write the first-fault register.
+
+ void wrffr(const PRegisterWithLaneSize& pn)
+
+
+<a id="sve-x">
+
+### XAR ###
+
+Bitwise exclusive OR and rotate right by immediate.
+
+ void xar(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int shift)
+
+
+<a id="sve-z">
+
+### ZIP1 ###
+
+Interleave elements from two half predicates.
+
+ void zip1(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ZIP1 ###
+
+Interleave elements from two half vectors.
+
+ void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+### ZIP2 ###
+
+Interleave elements from two half predicates.
+
+ void zip2(const PRegisterWithLaneSize& pd,
+ const PRegisterWithLaneSize& pn,
+ const PRegisterWithLaneSize& pm)
+
+
+### ZIP2 ###
+
+Interleave elements from two half vectors.
+
+ void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm)
+
+
+
Additional or pseudo instructions
---------------------------------
+<a id="pseudo-b">
+
### BIND ###
Bind a label to the current PC.
@@ -6204,6 +12403,8 @@ Bind a label to the current PC.
void bind(Label* label)
+<a id="pseudo-d">
+
### DC ###
Emit data in the instruction stream.
@@ -6232,6 +12433,8 @@ Emit raw instructions into the instruction stream.
void dci(Instr raw_inst)
+<a id="pseudo-p">
+
### PLACE ###
Place a literal at the current PC.
diff --git a/doc/changelog.md b/doc/changelog.md
deleted file mode 100644
index 70d0755b..00000000
--- a/doc/changelog.md
+++ /dev/null
@@ -1,124 +0,0 @@
-VIXL Change Log
-===============
-
-* 1.13
- + Improve code formatting and add tests using clang-format.
- + Fix bugs in disassembly of unallocated instruction encodings.
- + Fix some execution trace bugs, and add tests.
- + Other small bug fixes and improvements.
-
-* 1.12
- + Bug fixes for toolchain compatibility.
-
-* 1.11
- + Fix bug in simulation of add with carry.
- + Fix use-after-free bug in Literal handling.
- + Build system updates for Android.
- + Add option to run test.py under Valgrind.
- + Other small bug fixes and improvements.
-
-* 1.10
- + Improved support for externally managed literals.
- + Reworked build and test infrastructure.
- + Other small bug fixes and improvements.
-
-* 1.9
- + Improved compatibility with Android build system.
- + Improved compatibility with Clang toolchain.
- + Added support for `umulh` instruction.
- + Added support for `fcmpe` and `fccmpe` instructions.
- + Other small bug fixes and improvements.
-
-* 1.8
- + Complete NEON instruction set support.
- + Support long branches using veneers.
- + Improved handling of literal pools.
- + Support some `ic` and `dc` cache op instructions.
- + Support CRC32 instructions.
- + Support half-precision floating point instructions.
- + MacroAssembler support for `bfm`, `ubfm` and `sbfm`.
- + Other small bug fixes and improvements.
-
-* 1.7
- + Added support for `prfm` prefetch instructions.
- + Added support for all `frint` instruction variants.
- + Add support for disassembling as an offset from a given address.
- + Fixed the disassembly of `movz` and `movn`.
- + Provide static helpers for immediate generation.
- + Provide helpers to create CPURegList from list unions or intersections.
- + Improved register value tracing.
- + Multithreading test fixes.
- + Other small bug fixes and build system improvements.
-
-* 1.6
- + Make literal pool management the responsibility of the macro assembler.
- + Move code buffer management out of the Assembler.
- + Support `ldrsw` for literals.
- + Support binding a label to a specific offset.
- + Add macro assembler support for load/store pair with arbitrary offset.
- + Support Peek and Poke for CPURegLists.
- + Fix disassembly of branch targets.
- + Fix Decoder visitor insertion order.
- + Separate Decoder visitors into const and non-const variants.
- + Fix simulator for branches to tagged addresses.
- + Add a VIM YouCompleteMe configuration file.
- + Other small bug fixes and build system improvements.
-
-* 1.5
- + Tagged pointer support.
- + Implement support for exclusive access instructions.
- + Implement support for `adrp` instruction.
- + Faster code for logical immediate identification.
- + Generate better code for immediates passed to shift-capable instructions.
- + Allow explicit use of unscaled-offset loads and stores.
- + Build and test infrastructure improvements.
- + Corrected computation of cache line size.
- + Fix simulation of `extr` instruction.
- + Fixed a bug when moving kWMinInt to a register.
- + Other small bug fixes.
-
-* 1.4
- + Added support for `frintm`.
- + Fixed simulation of `frintn` and `frinta` for corner cases.
- + Added more tests for floating point instruction simulation.
- + Modified `CalleeSave()` and `CalleeRestore()` to push general purpose
- registers before floating point registers on the stack.
- + Fixed Printf for mixed argument types, and use on real hardware.
- + Improved compatibility with some 32-bit compilers.
-
-* 1.3
- + Address inaccuracies in the simulated floating point instructions.
- + Implement Default-NaN floating point mode.
- + Introduce `UseScratchRegisterScope` for controlling the use of temporary
- registers.
- + Enable building VIXL on 32-bit hosts.
- + Other small bug fixes and improvements.
-
-* 1.2
- + Added support for `fmadd`, `fnmadd`, `fnmsub`, `fminnm`, `fmaxnm`,
- `frinta`, `fcvtau` and `fcvtas`.
- + Added support for assembling and disassembling `isb`, `dsb` and `dmb`.
- + Added support for automatic inversion of compare instructions when using
- negative immediates.
- + Added support for using `movn` when generating immediates.
- + Added explicit flag-setting 'S' instructions, and removed
- `SetFlags` and `LeaveFlags` arguments.
- + Added support for `Movk` in macro assembler.
- + Added support for W register parameters to `Tbz` and `Tbnz`.
- + Added support for using immediate operands with `Csel`.
- + Added new debugger syntax for memory inspection.
- + Fixed `smull`, `fmsub` and `sdiv` simulation.
- + Fixed sign extension for W->X conversions using `sxtb`, `sxth` and `sxtw`.
- + Prevented code generation for certain side-effect free operations,
- such as `add r, r, #0`, in the macro assembler.
- + Other small bug fixes.
-
-* 1.1
- + Improved robustness of instruction decoder and disassembler.
- + Added support for double-to-float conversions using `fcvt`.
- + Added support for more fixed-point to floating-point conversions (`ucvtf`
- and `scvtf`).
- + Added instruction statistics collection class `instrument-a64.cc`.
-
-* 1.0
- + Initial release.
diff --git a/examples/aarch32/custom-aarch32-disasm.cc b/examples/aarch32/custom-aarch32-disasm.cc
index e4df9ff3..9577ec78 100644
--- a/examples/aarch32/custom-aarch32-disasm.cc
+++ b/examples/aarch32/custom-aarch32-disasm.cc
@@ -73,11 +73,11 @@ class CustomDisassembler : public PrintDisassembler {
CustomStream* GetStream() const {
return reinterpret_cast<CustomStream*>(&os());
}
- virtual void PrintCodeAddress(uint32_t pc) VIXL_OVERRIDE {
+ virtual void PrintCodeAddress(uint32_t addr) VIXL_OVERRIDE {
// If the address matches a label, then print the label. Otherwise, print
// nothing.
std::map<Location::Offset, const char*>::iterator symbol =
- GetStream()->GetSymbols().find(pc);
+ GetStream()->GetSymbols().find(addr);
if (symbol != GetStream()->GetSymbols().end()) {
os().os() << symbol->second << ":" << std::endl;
}
diff --git a/examples/aarch64/custom-disassembler.cc b/examples/aarch64/custom-disassembler.cc
index 97c94c80..9ea6aacf 100644
--- a/examples/aarch64/custom-disassembler.cc
+++ b/examples/aarch64/custom-disassembler.cc
@@ -24,6 +24,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include <regex>
+
#include "custom-disassembler.h"
#include "examples.h"
@@ -102,13 +104,20 @@ void CustomDisassembler::AppendCodeRelativeCodeAddressToOutput(
}
-// We override this method to add a comment to this type of instruction. Helpers
-// from the vixl::Instruction class can be used to analyse the instruction being
+// We override this method to add a comment to some instructions. Helpers from
+// the vixl::Instruction class can be used to analyse the instruction being
// disasssembled.
-void CustomDisassembler::VisitAddSubShifted(const Instruction* instr) {
- vixl::aarch64::Disassembler::VisitAddSubShifted(instr);
- if (instr->GetRd() == 10) {
- AppendToOutput(" // add/sub to x10");
+void CustomDisassembler::Visit(Metadata* metadata, const Instruction* instr) {
+ vixl::aarch64::Disassembler::Visit(metadata, instr);
+ const std::string& form = (*metadata)["form"];
+
+ // Match the forms for 32/64-bit add/subtract with shift, with optional flag
+ // setting.
+ if (std::regex_match(form, // NOLINT: avoid clang-tidy-4.0 errors.
+ std::regex("(?:add|sub)s?_(?:32|64)_addsub_shift"))) {
+ if (instr->GetRd() == 10) {
+ AppendToOutput(" // add/sub to x10");
+ }
}
ProcessOutput(instr);
}
diff --git a/examples/aarch64/custom-disassembler.h b/examples/aarch64/custom-disassembler.h
index cfff489f..261a7853 100644
--- a/examples/aarch64/custom-disassembler.h
+++ b/examples/aarch64/custom-disassembler.h
@@ -40,8 +40,8 @@ class CustomDisassembler : public vixl::aarch64::Disassembler {
CustomDisassembler() : vixl::aarch64::Disassembler() {}
virtual ~CustomDisassembler() {}
- virtual void VisitAddSubShifted(const vixl::aarch64::Instruction* instr)
- VIXL_OVERRIDE;
+ virtual void Visit(vixl::aarch64::Metadata* metadata,
+ const vixl::aarch64::Instruction* instr) VIXL_OVERRIDE;
protected:
virtual void AppendRegisterNameToOutput(
diff --git a/examples/aarch64/executable-memory.h b/examples/aarch64/executable-memory.h
new file mode 100644
index 00000000..8a9ef1eb
--- /dev/null
+++ b/examples/aarch64/executable-memory.h
@@ -0,0 +1,88 @@
+// Copyright 2020, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_
+#define VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_
+
+extern "C" {
+#include <stdint.h>
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+#include <sys/mman.h>
+#endif
+}
+
+#include <cstdio>
+#include <string>
+
+#include "aarch64/assembler-aarch64.h"
+#include "aarch64/constants-aarch64.h"
+#include "aarch64/cpu-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+class ExecutableMemory {
+ public:
+ ExecutableMemory(const vixl::byte* code_start, size_t size)
+ : size_(size),
+ buffer_(reinterpret_cast<vixl::byte*>(mmap(NULL,
+ size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ -1,
+ 0))) {
+ VIXL_CHECK(buffer_ != MAP_FAILED);
+ memcpy(buffer_, code_start, size_);
+
+ vixl::aarch64::CPU::EnsureIAndDCacheCoherency(buffer_, size_);
+ int res = mprotect(buffer_, size_, PROT_READ | PROT_EXEC);
+ VIXL_CHECK(res == 0);
+ }
+ ~ExecutableMemory() { munmap(buffer_, size_); }
+
+ template <typename T>
+ T GetEntryPoint(const vixl::aarch64::Label& entry_point) const {
+ int64_t location = entry_point.GetLocation();
+ return GetOffsetAddress<T>(location);
+ }
+
+ private:
+ template <typename T>
+ T GetOffsetAddress(int64_t offset) const {
+ VIXL_ASSERT((offset >= 0) && (static_cast<size_t>(offset) <= size_));
+ T function_address;
+ vixl::byte* buffer_address = buffer_ + offset;
+
+ VIXL_STATIC_ASSERT(sizeof(T) == sizeof(buffer_address));
+ memcpy(&function_address, &buffer_address, sizeof(T));
+ return function_address;
+ }
+
+ size_t size_;
+ vixl::byte* buffer_;
+};
+#endif
+
+#endif // VIXL_EXAMPLE_EXECUTABLE_MEMORY_H_
diff --git a/examples/aarch64/getting-started.cc b/examples/aarch64/getting-started.cc
index c5da4c8a..a0834989 100644
--- a/examples/aarch64/getting-started.cc
+++ b/examples/aarch64/getting-started.cc
@@ -27,6 +27,8 @@
#include "aarch64/macro-assembler-aarch64.h"
#include "aarch64/simulator-aarch64.h"
+#include "executable-memory.h"
+
using namespace vixl;
using namespace vixl::aarch64;
@@ -43,25 +45,34 @@ void GenerateDemoFunction(MacroAssembler *masm) {
#ifndef TEST_EXAMPLES
-#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
int main() {
MacroAssembler masm;
- Decoder decoder;
- Simulator simulator(&decoder);
- Label demo_function;
- masm.Bind(&demo_function);
+ Label demo;
+ masm.Bind(&demo);
GenerateDemoFunction(&masm);
masm.FinalizeCode();
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+ Decoder decoder;
+ Simulator simulator(&decoder);
+
simulator.WriteXRegister(0, 0x8899aabbccddeeff);
- simulator.RunFrom(masm.GetLabelAddress<Instruction *>(&demo_function));
+ simulator.RunFrom(masm.GetLabelAddress<Instruction *>(&demo));
printf("x0 = %" PRIx64 "\n", simulator.ReadXRegister(0));
- return 0;
-}
#else
-// Without the simulator there is nothing to test.
-int main(void) { return 0; }
+ byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
+ size_t code_size = masm.GetSizeOfCodeGenerated();
+ ExecutableMemory memory(code, code_size);
+ // Run the example function.
+ uint64_t (*demo_function)(uint64_t) =
+ memory.GetEntryPoint<uint64_t (*)(uint64_t)>(demo);
+ uint64_t input_value = 0x8899aabbccddeeff;
+ uint64_t output_value = (*demo_function)(input_value);
+ printf("native: demo(0x%016lx) = 0x%016lx\n", input_value, output_value);
#endif // VIXL_INCLUDE_SIMULATOR_AARCH64
+
+ return 0;
+}
#endif // TEST_EXAMPLES
diff --git a/examples/aarch64/non-const-visitor.cc b/examples/aarch64/non-const-visitor.cc
index d4c54fb1..307b618f 100644
--- a/examples/aarch64/non-const-visitor.cc
+++ b/examples/aarch64/non-const-visitor.cc
@@ -24,8 +24,10 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "non-const-visitor.h"
+#include <regex>
+
#include "examples.h"
+#include "non-const-visitor.h"
using namespace vixl;
using namespace vixl::aarch64;
@@ -33,20 +35,28 @@ using namespace vixl::aarch64;
#define __ masm->
-void SwitchAddSubRegisterSources::VisitAddSubShifted(const Instruction* instr) {
- int rn = instr->GetRn();
- int rm = instr->GetRm();
- // Only non-const visitors are allowed to discard constness of the visited
- // instruction.
- Instruction* mutable_instr = MutableInstruction(instr);
- Instr instr_bits = mutable_instr->GetInstructionBits();
+void SwitchAddSubRegisterSources::Visit(Metadata* metadata,
+ const Instruction* instr) {
+ const std::string& form = (*metadata)["form"];
- // Switch the bitfields for the `rn` and `rm` registers.
- instr_bits &= ~(Rn_mask | Rm_mask);
- instr_bits |= (rn << Rm_offset) | (rm << Rn_offset);
+ // Match the forms for 32/64-bit add/subtract with shift, with optional flag
+ // setting.
+ if (std::regex_match(form, // NOLINT: avoid clang-tidy-4.0 errors.
+ std::regex("(?:add|sub)s?_(?:32|64)_addsub_shift"))) {
+ int rn = instr->GetRn();
+ int rm = instr->GetRm();
+ // Only non-const visitors are allowed to discard constness of the visited
+ // instruction.
+ Instruction* mutable_instr = MutableInstruction(instr);
+ Instr instr_bits = mutable_instr->GetInstructionBits();
- // Rewrite the instruction.
- mutable_instr->SetInstructionBits(instr_bits);
+ // Switch the bitfields for the `rn` and `rm` registers.
+ instr_bits &= ~(Rn_mask | Rm_mask);
+ instr_bits |= (rn << Rm_offset) | (rm << Rn_offset);
+
+ // Rewrite the instruction.
+ mutable_instr->SetInstructionBits(instr_bits);
+ }
}
diff --git a/examples/aarch64/non-const-visitor.h b/examples/aarch64/non-const-visitor.h
index 243cc156..b7c50797 100644
--- a/examples/aarch64/non-const-visitor.h
+++ b/examples/aarch64/non-const-visitor.h
@@ -30,17 +30,16 @@
#include "aarch64/decoder-aarch64.h"
#include "aarch64/macro-assembler-aarch64.h"
-class SwitchAddSubRegisterSources
- : public vixl::aarch64::DecoderVisitorWithDefaults {
+class SwitchAddSubRegisterSources : public vixl::aarch64::DecoderVisitor {
public:
SwitchAddSubRegisterSources()
- : vixl::aarch64::DecoderVisitorWithDefaults(kNonConstVisitor) {}
+ : vixl::aarch64::DecoderVisitor(kNonConstVisitor) {}
// Our visitor switches the register sources for some add and sub instructions
// (not all add and sub instructions).
- virtual void VisitAddSubShifted(const vixl::aarch64::Instruction* instr)
- VIXL_OVERRIDE;
+ virtual void Visit(vixl::aarch64::Metadata* metadata,
+ const vixl::aarch64::Instruction* instr) VIXL_OVERRIDE;
};
diff --git a/src/aarch32/assembler-aarch32.cc b/src/aarch32/assembler-aarch32.cc
index 5f636981..64126664 100644
--- a/src/aarch32/assembler-aarch32.cc
+++ b/src/aarch32/assembler-aarch32.cc
@@ -2557,13 +2557,13 @@ void Assembler::adr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- const int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
+ const int32_t target = off >> 2;
return instr | (target & 0xff);
}
} immop;
@@ -2588,15 +2588,16 @@ void Assembler::adr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
int32_t target;
- if ((offset >= 0) && (offset <= 4095)) {
- target = offset;
+ if ((off >= 0) && (off <= 4095)) {
+ target = off;
} else {
- target = -offset;
+ target = -off;
VIXL_ASSERT((target >= 0) && (target <= 4095));
// Emit the T2 encoding.
instr |= 0x00a00000;
@@ -2622,19 +2623,20 @@ void Assembler::adr(Condition cond,
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
int32_t target;
- ImmediateA32 positive_immediate_a32(offset);
- if (positive_immediate_a32.IsValid()) {
- target = positive_immediate_a32.GetEncodingValue();
+ ImmediateA32 pos_imm_a32(off);
+ if (pos_imm_a32.IsValid()) {
+ target = pos_imm_a32.GetEncodingValue();
} else {
- ImmediateA32 negative_immediate_a32(-offset);
- VIXL_ASSERT(negative_immediate_a32.IsValid());
+ ImmediateA32 neg_imm_a32(-off);
+ VIXL_ASSERT(neg_imm_a32.IsValid());
// Emit the A2 encoding.
- target = negative_immediate_a32.GetEncodingValue();
+ target = neg_imm_a32.GetEncodingValue();
instr = (instr & ~0x00f00000) | 0x00400000;
}
return instr | (target & 0xfff);
@@ -3024,13 +3026,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -256) && (offset <= 254) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -256) && (off <= 254) && ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | (target & 0xff);
}
} immop;
@@ -3051,13 +3052,12 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -2048) && (offset <= 2046) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -2048) && (off <= 2046) && ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | (target & 0x7ff);
}
} immop;
@@ -3075,13 +3075,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -1048576) && (offset <= 1048574) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -1048576) && (off <= 1048574) &&
+ ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | (target & 0x7ff) | ((target & 0x1f800) << 5) |
((target & 0x20000) >> 4) | ((target & 0x40000) >> 7) |
((target & 0x80000) << 7);
@@ -3104,13 +3104,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
- ((offset & 0x1) == 0));
- int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
+ ((off & 0x1) == 0));
+ int32_t target = off >> 1;
uint32_t S = target & (1 << 23);
target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@@ -3132,13 +3132,13 @@ void Assembler::b(Condition cond, EncodingSize size, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
- ((offset & 0x3) == 0));
- const int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
+ ((off & 0x3) == 0));
+ const int32_t target = off >> 2;
return instr | (target & 0xffffff);
}
} immop;
@@ -3462,13 +3462,13 @@ void Assembler::bl(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -16777216) && (offset <= 16777214) &&
- ((offset & 0x1) == 0));
- int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -16777216) && (off <= 16777214) &&
+ ((off & 0x1) == 0));
+ int32_t target = off >> 1;
uint32_t S = target & (1 << 23);
target ^= ((S >> 1) | (S >> 2)) ^ (3 << 21);
return instr | (target & 0x7ff) | ((target & 0x1ff800) << 5) |
@@ -3490,13 +3490,13 @@ void Assembler::bl(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= -33554432) && (offset <= 33554428) &&
- ((offset & 0x3) == 0));
- const int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= -33554432) && (off <= 33554428) &&
+ ((off & 0x3) == 0));
+ const int32_t target = off >> 2;
return instr | (target & 0xffffff);
}
} immop;
@@ -3549,13 +3549,14 @@ void Assembler::blx(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -16777216) && (offset <= 16777212) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -16777216) && (off <= 16777212) &&
+ ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t S = target & (1 << 22);
target ^= ((S >> 1) | (S >> 2)) ^ (3 << 20);
return instr | ((target & 0x3ff) << 1) | ((target & 0xffc00) << 6) |
@@ -3577,15 +3578,14 @@ void Assembler::blx(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const
- VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset =
- location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -33554432) && (offset <= 33554430) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -33554432) && (off <= 33554430) &&
+ ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | ((target & 0x1) << 24) | ((target & 0x1fffffe) >> 1);
}
} immop;
@@ -3698,13 +3698,12 @@ void Assembler::cbnz(Register rn, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
}
} immop;
@@ -3748,13 +3747,12 @@ void Assembler::cbz(Register rn, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - pc;
- VIXL_ASSERT((offset >= 0) && (offset <= 126) &&
- ((offset & 0x1) == 0));
- const int32_t target = offset >> 1;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off = loc->GetLocation() - program_counter;
+ VIXL_ASSERT((off >= 0) && (off <= 126) && ((off & 0x1) == 0));
+ const int32_t target = off >> 1;
return instr | ((target & 0x1f) << 3) | ((target & 0x20) << 4);
}
} immop;
@@ -4790,7 +4788,7 @@ void Assembler::ldm(Condition cond,
}
// LDM{<c>}{<q>} SP!, <registers> ; T1
if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
- ((registers.GetList() & ~0x80ff) == 0)) {
+ registers.IsR0toR7orPC()) {
EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
GetRegisterListEncoding(registers, 0, 8));
AdvanceIT();
@@ -5208,13 +5206,13 @@ void Assembler::ldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= 0) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- const int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= 0) && (off <= 1020) && ((off & 0x3) == 0));
+ const int32_t target = off >> 2;
return instr | (target & 0xff);
}
} immop;
@@ -5233,13 +5231,14 @@ void Assembler::ldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -5259,13 +5258,14 @@ void Assembler::ldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -5505,13 +5505,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -5531,13 +5532,14 @@ void Assembler::ldrb(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -5747,13 +5749,13 @@ void Assembler::ldrd(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t U = (target >= 0);
target = abs(target) | (U << 8);
return instr | (target & 0xff) | ((target & 0x100) << 15);
@@ -5777,13 +5779,14 @@ void Assembler::ldrd(Condition cond,
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -255) && (offset <= 255));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 8);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -255) && (off <= 255));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 8);
return instr | (target & 0xf) | ((target & 0xf0) << 4) |
((target & 0x100) << 15);
}
@@ -6129,13 +6132,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -6155,13 +6159,14 @@ void Assembler::ldrh(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -255) && (offset <= 255));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 8);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -255) && (off <= 255));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 8);
return instr | (target & 0xf) | ((target & 0xf0) << 4) |
((target & 0x100) << 15);
}
@@ -6382,13 +6387,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -6408,13 +6414,14 @@ void Assembler::ldrsb(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -255) && (offset <= 255));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 8);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -255) && (off <= 255));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 8);
return instr | (target & 0xf) | ((target & 0xf0) << 4) |
((target & 0x100) << 15);
}
@@ -6635,13 +6642,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -6661,13 +6669,14 @@ void Assembler::ldrsh(Condition cond, Register rt, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -255) && (offset <= 255));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 8);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -255) && (off <= 255));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 8);
return instr | (target & 0xf) | ((target & 0xf0) << 4) |
((target & 0x100) << 15);
}
@@ -8039,13 +8048,14 @@ void Assembler::pld(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -8062,15 +8072,14 @@ void Assembler::pld(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const
- VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset =
- location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -8403,13 +8412,14 @@ void Assembler::pli(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -8426,15 +8436,14 @@ void Assembler::pli(Condition cond, Location* location) {
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const
- VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset =
- location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -4095) && (offset <= 4095));
- uint32_t U = (offset >= 0);
- int32_t target = abs(offset) | (U << 12);
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -4095) && (off <= 4095));
+ uint32_t U = (off >= 0);
+ int32_t target = abs(off) | (U << 12);
return instr | (target & 0xfff) | ((target & 0x1000) << 11);
}
} immop;
@@ -8471,29 +8480,39 @@ bool Assembler::pli_info(Condition cond,
void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) {
VIXL_ASSERT(AllowAssembler());
CheckIT(cond);
- if (IsUsingT32()) {
- // POP{<c>}{<q>} <registers> ; T1
- if (!size.IsWide() && ((registers.GetList() & ~0x80ff) == 0)) {
- EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
- GetRegisterListEncoding(registers, 0, 8));
- AdvanceIT();
- return;
- }
- // POP{<c>}{<q>} <registers> ; T2
- if (!size.IsNarrow() && ((registers.GetList() & ~0xdfff) == 0)) {
- EmitT32_32(0xe8bd0000U |
- (GetRegisterListEncoding(registers, 15, 1) << 15) |
- (GetRegisterListEncoding(registers, 14, 1) << 14) |
- GetRegisterListEncoding(registers, 0, 13));
- AdvanceIT();
- return;
- }
- } else {
- // POP{<c>}{<q>} <registers> ; A1
- if (cond.IsNotNever()) {
- EmitA32(0x08bd0000U | (cond.GetCondition() << 28) |
- GetRegisterListEncoding(registers, 0, 16));
- return;
+ if (!registers.IsEmpty() || AllowUnpredictable()) {
+ if (IsUsingT32()) {
+ // A branch out of an IT block should be the last instruction in the
+ // block.
+ if (!registers.Includes(pc) || OutsideITBlockAndAlOrLast(cond) ||
+ AllowUnpredictable()) {
+ // POP{<c>}{<q>} <registers> ; T1
+ if (!size.IsWide() && registers.IsR0toR7orPC()) {
+ EmitT32_16(0xbc00 | (GetRegisterListEncoding(registers, 15, 1) << 8) |
+ GetRegisterListEncoding(registers, 0, 8));
+ AdvanceIT();
+ return;
+ }
+ // POP{<c>}{<q>} <registers> ; T2
+ // Alias of: LDM{<c>}{<q>} SP!, <registers> ; T2
+ if (!size.IsNarrow() &&
+ ((!registers.Includes(sp) && (registers.GetCount() > 1) &&
+ !(registers.Includes(pc) && registers.Includes(lr))) ||
+ AllowUnpredictable())) {
+ EmitT32_32(0xe8bd0000U | GetRegisterListEncoding(registers, 0, 16));
+ AdvanceIT();
+ return;
+ }
+ }
+ } else {
+ // POP{<c>}{<q>} <registers> ; A1
+ // Alias of: LDM{<c>}{<q>} SP!, <registers> ; A1
+ if (cond.IsNotNever() &&
+ (!registers.Includes(sp) || AllowUnpredictable())) {
+ EmitA32(0x08bd0000U | (cond.GetCondition() << 28) |
+ GetRegisterListEncoding(registers, 0, 16));
+ return;
+ }
}
}
Delegate(kPop, &Assembler::pop, cond, size, registers);
@@ -8502,19 +8521,24 @@ void Assembler::pop(Condition cond, EncodingSize size, RegisterList registers) {
void Assembler::pop(Condition cond, EncodingSize size, Register rt) {
VIXL_ASSERT(AllowAssembler());
CheckIT(cond);
- if (IsUsingT32()) {
- // POP{<c>}{<q>} <single_register_list> ; T4
- if (!size.IsNarrow() && ((!rt.IsPC() || OutsideITBlockAndAlOrLast(cond)) ||
- AllowUnpredictable())) {
- EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
- AdvanceIT();
- return;
- }
- } else {
- // POP{<c>}{<q>} <single_register_list> ; A1
- if (cond.IsNotNever()) {
- EmitA32(0x049d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
- return;
+ if (!rt.IsSP() || AllowUnpredictable()) {
+ if (IsUsingT32()) {
+ // POP{<c>}{<q>} <single_register_list> ; T4
+ // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T4
+ if (!size.IsNarrow() && (!rt.IsPC() || OutsideITBlockAndAlOrLast(cond) ||
+ AllowUnpredictable())) {
+ EmitT32_32(0xf85d0b04U | (rt.GetCode() << 12));
+ AdvanceIT();
+ return;
+ }
+ } else {
+ // POP{<c>}{<q>} <single_register_list> ; A1
+ // Alias of: LDR{<c>}{<q>} <Rt>, [SP], #4 ; T1
+ if (cond.IsNotNever()) {
+ EmitA32(0x049d0004U | (cond.GetCondition() << 28) |
+ (rt.GetCode() << 12));
+ return;
+ }
}
}
Delegate(kPop, &Assembler::pop, cond, size, rt);
@@ -8525,28 +8549,37 @@ void Assembler::push(Condition cond,
RegisterList registers) {
VIXL_ASSERT(AllowAssembler());
CheckIT(cond);
- if (IsUsingT32()) {
- // PUSH{<c>}{<q>} <registers> ; T1
- if (!size.IsWide() && ((registers.GetList() & ~0x40ff) == 0)) {
- EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
- GetRegisterListEncoding(registers, 0, 8));
- AdvanceIT();
- return;
- }
- // PUSH{<c>}{<q>} <registers> ; T1
- if (!size.IsNarrow() && ((registers.GetList() & ~0x5fff) == 0)) {
- EmitT32_32(0xe92d0000U |
- (GetRegisterListEncoding(registers, 14, 1) << 14) |
- GetRegisterListEncoding(registers, 0, 13));
- AdvanceIT();
- return;
- }
- } else {
- // PUSH{<c>}{<q>} <registers> ; A1
- if (cond.IsNotNever()) {
- EmitA32(0x092d0000U | (cond.GetCondition() << 28) |
- GetRegisterListEncoding(registers, 0, 16));
- return;
+ if (!registers.IsEmpty() || AllowUnpredictable()) {
+ if (IsUsingT32()) {
+ // PUSH{<c>}{<q>} <registers> ; T1
+ if (!size.IsWide() && registers.IsR0toR7orLR()) {
+ EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
+ GetRegisterListEncoding(registers, 0, 8));
+ AdvanceIT();
+ return;
+ }
+ // PUSH{<c>}{<q>} <registers> ; T1
+ // Alias of: STMDB SP!, <registers> ; T1
+ if (!size.IsNarrow() && !registers.Includes(pc) &&
+ ((!registers.Includes(sp) && (registers.GetCount() > 1)) ||
+ AllowUnpredictable())) {
+ EmitT32_32(0xe92d0000U | GetRegisterListEncoding(registers, 0, 15));
+ AdvanceIT();
+ return;
+ }
+ } else {
+ // PUSH{<c>}{<q>} <registers> ; A1
+ // Alias of: STMDB SP!, <registers> ; A1
+ if (cond.IsNotNever() &&
+ // For A32, sp can appear in the list, but stores an UNKNOWN value if
+ // it is not the lowest-valued register.
+ (!registers.Includes(sp) ||
+ registers.GetFirstAvailableRegister().IsSP() ||
+ AllowUnpredictable())) {
+ EmitA32(0x092d0000U | (cond.GetCondition() << 28) |
+ GetRegisterListEncoding(registers, 0, 16));
+ return;
+ }
}
}
Delegate(kPush, &Assembler::push, cond, size, registers);
@@ -8557,14 +8590,17 @@ void Assembler::push(Condition cond, EncodingSize size, Register rt) {
CheckIT(cond);
if (IsUsingT32()) {
// PUSH{<c>}{<q>} <single_register_list> ; T4
- if (!size.IsNarrow() && (!rt.IsPC() || AllowUnpredictable())) {
+ // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; T4
+ if (!size.IsNarrow() &&
+ ((!rt.IsPC() && !rt.IsSP()) || AllowUnpredictable())) {
EmitT32_32(0xf84d0d04U | (rt.GetCode() << 12));
AdvanceIT();
return;
}
} else {
// PUSH{<c>}{<q>} <single_register_list> ; A1
- if (cond.IsNotNever() && (!rt.IsPC() || AllowUnpredictable())) {
+ // Alias of: STR{<c>}{<q>} <Rt>, [SP, #4]! ; A1
+ if (cond.IsNotNever() && (!rt.IsSP() || AllowUnpredictable())) {
EmitA32(0x052d0004U | (cond.GetCondition() << 28) | (rt.GetCode() << 12));
return;
}
@@ -11177,7 +11213,7 @@ void Assembler::stmdb(Condition cond,
if (IsUsingT32()) {
// STMDB{<c>}{<q>} SP!, <registers> ; T1
if (!size.IsWide() && rn.Is(sp) && write_back.DoesWriteBack() &&
- ((registers.GetList() & ~0x40ff) == 0)) {
+ registers.IsR0toR7orLR()) {
EmitT32_16(0xb400 | (GetRegisterListEncoding(registers, 14, 1) << 8) |
GetRegisterListEncoding(registers, 0, 8));
AdvanceIT();
@@ -19589,13 +19625,13 @@ void Assembler::vldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t U = (target >= 0);
target = abs(target) | (U << 8);
return instr | (target & 0xff) | ((target & 0x100) << 15);
@@ -19619,13 +19655,13 @@ void Assembler::vldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t U = (target >= 0);
target = abs(target) | (U << 8);
return instr | (target & 0xff) | ((target & 0x100) << 15);
@@ -19743,13 +19779,13 @@ void Assembler::vldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(T32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kT32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kT32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t U = (target >= 0);
target = abs(target) | (U << 8);
return instr | (target & 0xff) | ((target & 0x100) << 15);
@@ -19773,13 +19809,13 @@ void Assembler::vldr(Condition cond,
public:
EmitOp() : Location::EmitOperator(A32) {}
virtual uint32_t Encode(uint32_t instr,
- Location::Offset pc,
- const Location* location) const VIXL_OVERRIDE {
- pc += kA32PcDelta;
- Location::Offset offset = location->GetLocation() - AlignDown(pc, 4);
- VIXL_ASSERT((offset >= -1020) && (offset <= 1020) &&
- ((offset & 0x3) == 0));
- int32_t target = offset >> 2;
+ Location::Offset program_counter,
+ const Location* loc) const VIXL_OVERRIDE {
+ program_counter += kA32PcDelta;
+ Location::Offset off =
+ loc->GetLocation() - AlignDown(program_counter, 4);
+ VIXL_ASSERT((off >= -1020) && (off <= 1020) && ((off & 0x3) == 0));
+ int32_t target = off >> 2;
uint32_t U = (target >= 0);
target = abs(target) | (U << 8);
return instr | (target & 0xff) | ((target & 0x100) << 15);
diff --git a/src/aarch32/instructions-aarch32.cc b/src/aarch32/instructions-aarch32.cc
index 2d1cb905..92450d41 100644
--- a/src/aarch32/instructions-aarch32.cc
+++ b/src/aarch32/instructions-aarch32.cc
@@ -95,10 +95,10 @@ QRegister VRegister::Q() const {
Register RegisterList::GetFirstAvailableRegister() const {
- for (uint32_t i = 0; i < kNumberOfRegisters; i++) {
- if (((list_ >> i) & 1) != 0) return Register(i);
+ if (list_ == 0) {
+ return Register();
}
- return Register();
+ return Register(CountTrailingZeros(list_));
}
diff --git a/src/aarch32/instructions-aarch32.h b/src/aarch32/instructions-aarch32.h
index f11f2b02..e2c95d19 100644
--- a/src/aarch32/instructions-aarch32.h
+++ b/src/aarch32/instructions-aarch32.h
@@ -38,7 +38,7 @@ extern "C" {
#include "utils-vixl.h"
#include "aarch32/constants-aarch32.h"
-#ifdef __arm__
+#if defined(__arm__) && !defined(__SOFTFP__)
#define HARDFLOAT __attribute__((noinline, pcs("aapcs-vfp")))
#else
#define HARDFLOAT __attribute__((noinline))
@@ -491,6 +491,8 @@ class RegisterList {
}
Register GetFirstAvailableRegister() const;
bool IsEmpty() const { return list_ == 0; }
+ bool IsSingleRegister() const { return IsPowerOf2(list_); }
+ int GetCount() const { return CountSetBits(list_); }
static RegisterList Union(const RegisterList& list_1,
const RegisterList& list_2) {
return RegisterList(list_1.list_ | list_2.list_);
diff --git a/src/aarch32/macro-assembler-aarch32.h b/src/aarch32/macro-assembler-aarch32.h
index 6d76642f..390b9088 100644
--- a/src/aarch32/macro-assembler-aarch32.h
+++ b/src/aarch32/macro-assembler-aarch32.h
@@ -402,13 +402,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
VIXL_ASSERT(GetBuffer()->Is32bitAligned());
}
// If we need to add padding, check if we have to emit the pool.
- const int32_t pc = GetCursorOffset();
- if (label->Needs16BitPadding(pc)) {
+ const int32_t cursor = GetCursorOffset();
+ if (label->Needs16BitPadding(cursor)) {
const int kPaddingBytes = 2;
- if (pool_manager_.MustEmit(pc, kPaddingBytes)) {
- int32_t new_pc = pool_manager_.Emit(this, pc, kPaddingBytes);
- USE(new_pc);
- VIXL_ASSERT(new_pc == GetCursorOffset());
+ if (pool_manager_.MustEmit(cursor, kPaddingBytes)) {
+ int32_t new_cursor = pool_manager_.Emit(this, cursor, kPaddingBytes);
+ USE(new_cursor);
+ VIXL_ASSERT(new_cursor == GetCursorOffset());
}
}
pool_manager_.Bind(this, label, GetCursorOffset());
@@ -430,30 +430,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
Location* location,
Condition* cond = NULL) {
int size = info->size;
- int32_t pc = GetCursorOffset();
+ int32_t cursor = GetCursorOffset();
// If we need to emit a branch over the instruction, take this into account.
if ((cond != NULL) && NeedBranch(cond)) {
size += kBranchSize;
- pc += kBranchSize;
+ cursor += kBranchSize;
}
- int32_t from = pc;
+ int32_t from = cursor;
from += IsUsingT32() ? kT32PcDelta : kA32PcDelta;
if (info->pc_needs_aligning) from = AlignDown(from, 4);
int32_t min = from + info->min_offset;
int32_t max = from + info->max_offset;
- ForwardReference<int32_t> temp_ref(pc,
+ ForwardReference<int32_t> temp_ref(cursor,
info->size,
min,
max,
info->alignment);
if (pool_manager_.MustEmit(GetCursorOffset(), size, &temp_ref, location)) {
- int32_t new_pc = pool_manager_.Emit(this,
- GetCursorOffset(),
- info->size,
- &temp_ref,
- location);
- USE(new_pc);
- VIXL_ASSERT(new_pc == GetCursorOffset());
+ int32_t new_cursor = pool_manager_.Emit(this,
+ GetCursorOffset(),
+ info->size,
+ &temp_ref,
+ location);
+ USE(new_cursor);
+ VIXL_ASSERT(new_cursor == GetCursorOffset());
}
}
@@ -464,13 +464,13 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
// into account, as well as potential 16-bit padding needed to reach the
// minimum accessible location.
int alignment = literal->GetMaxAlignment();
- int32_t pc = GetCursorOffset();
- int total_size = AlignUp(pc, alignment) - pc + literal->GetSize();
- if (literal->Needs16BitPadding(pc)) total_size += 2;
- if (pool_manager_.MustEmit(pc, total_size)) {
- int32_t new_pc = pool_manager_.Emit(this, pc, total_size);
- USE(new_pc);
- VIXL_ASSERT(new_pc == GetCursorOffset());
+ int32_t cursor = GetCursorOffset();
+ int total_size = AlignUp(cursor, alignment) - cursor + literal->GetSize();
+ if (literal->Needs16BitPadding(cursor)) total_size += 2;
+ if (pool_manager_.MustEmit(cursor, total_size)) {
+ int32_t new_cursor = pool_manager_.Emit(this, cursor, total_size);
+ USE(new_cursor);
+ VIXL_ASSERT(new_cursor == GetCursorOffset());
}
pool_manager_.Bind(this, literal, GetCursorOffset());
literal->EmitPoolObject(this);
@@ -2897,7 +2897,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
VIXL_ASSERT(OutsideITBlock());
MacroEmissionCheckScope guard(this);
ITScope it_scope(this, &cond, guard);
- pop(cond, registers);
+ if (registers.IsSingleRegister() &&
+ (!IsUsingT32() || !registers.IsR0toR7orPC())) {
+ pop(cond, registers.GetFirstAvailableRegister());
+ } else if (!registers.IsEmpty()) {
+ pop(cond, registers);
+ }
}
void Pop(RegisterList registers) { Pop(al, registers); }
@@ -2917,7 +2922,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
VIXL_ASSERT(OutsideITBlock());
MacroEmissionCheckScope guard(this);
ITScope it_scope(this, &cond, guard);
- push(cond, registers);
+ if (registers.IsSingleRegister() && !registers.Includes(sp) &&
+ (!IsUsingT32() || !registers.IsR0toR7orLR())) {
+ push(cond, registers.GetFirstAvailableRegister());
+ } else if (!registers.IsEmpty()) {
+ push(cond, registers);
+ }
}
void Push(RegisterList registers) { Push(al, registers); }
@@ -2927,7 +2937,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
VIXL_ASSERT(OutsideITBlock());
MacroEmissionCheckScope guard(this);
ITScope it_scope(this, &cond, guard);
- push(cond, rt);
+ if (IsUsingA32() && rt.IsSP()) {
+ // Only the A32 multiple-register form can push sp.
+ push(cond, RegisterList(rt));
+ } else {
+ push(cond, rt);
+ }
}
void Push(Register rt) { Push(al, rt); }
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 534e1d9b..895e8c50 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -1054,7 +1054,7 @@ void Assembler::cls(const Register& rd, const Register& rn) {
void Assembler::PRE##za(const Register& xd) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
VIXL_ASSERT(xd.Is64Bits()); \
- Emit(SF(xd) | OP##ZA | Rd(xd)); \
+ Emit(SF(xd) | OP##ZA | Rd(xd) | Rn(xzr)); \
} \
\
void Assembler::PRE##b(const Register& xd, const Register& xn) { \
@@ -1066,7 +1066,7 @@ void Assembler::cls(const Register& rd, const Register& rn) {
void Assembler::PRE##zb(const Register& xd) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
VIXL_ASSERT(xd.Is64Bits()); \
- Emit(SF(xd) | OP##ZB | Rd(xd)); \
+ Emit(SF(xd) | OP##ZB | Rd(xd) | Rn(xzr)); \
}
PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC)
@@ -1083,13 +1083,13 @@ void Assembler::pacga(const Register& xd,
void Assembler::xpaci(const Register& xd) {
VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
VIXL_ASSERT(xd.Is64Bits());
- Emit(SF(xd) | XPACI | Rd(xd));
+ Emit(SF(xd) | XPACI | Rd(xd) | Rn(xzr));
}
void Assembler::xpacd(const Register& xd) {
VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
VIXL_ASSERT(xd.Is64Bits());
- Emit(SF(xd) | XPACD | Rd(xd));
+ Emit(SF(xd) | XPACD | Rd(xd) | Rn(xzr));
}
@@ -1134,10 +1134,10 @@ void Assembler::LoadStorePair(const CPURegister& rt,
if (addr.IsImmediateOffset()) {
addrmodeop = LoadStorePairOffsetFixed;
} else {
- if (addr.IsPreIndex()) {
+ if (addr.IsImmediatePreIndex()) {
addrmodeop = LoadStorePairPreIndexFixed;
} else {
- VIXL_ASSERT(addr.IsPostIndex());
+ VIXL_ASSERT(addr.IsImmediatePostIndex());
addrmodeop = LoadStorePairPostIndexFixed;
}
}
@@ -3852,6 +3852,15 @@ void Assembler::udot(const VRegister& vd,
Emit(VFormat(vd) | NEON_UDOT | Rm(vm) | Rn(vn) | Rd(vd));
}
+void Assembler::usdot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM));
+ VIXL_ASSERT(AreSameFormat(vn, vm));
+ VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B()));
+
+ Emit(VFormat(vd) | 0x0e809c00 | Rm(vm) | Rn(vn) | Rd(vd));
+}
void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
@@ -4166,6 +4175,32 @@ void Assembler::udot(const VRegister& vd,
ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd));
}
+void Assembler::sudot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM));
+ VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) ||
+ (vd.Is4S() && vn.Is16B() && vm.Is1S4B()));
+ int q = vd.Is4S() ? (1U << NEONQ_offset) : 0;
+ int index_num_bits = 2;
+ Emit(q | 0x0f00f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
+ Rd(vd));
+}
+
+
+void Assembler::usdot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM));
+ VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) ||
+ (vd.Is4S() && vn.Is16B() && vm.Is1S4B()));
+ int q = vd.Is4S() ? (1U << NEONQ_offset) : 0;
+ int index_num_bits = 2;
+ Emit(q | 0x0f80f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
+ Rd(vd));
+}
// clang-format off
#define NEON_BYELEMENT_LIST(V) \
@@ -5224,6 +5259,32 @@ void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
}
+void Assembler::smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM));
+ VIXL_ASSERT(vd.IsLaneSizeS());
+ VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB());
+
+ Emit(0x4e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM));
+ VIXL_ASSERT(vd.IsLaneSizeS());
+ VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB());
+
+ Emit(0x4e80ac00 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM));
+ VIXL_ASSERT(vd.IsLaneSizeS());
+ VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB());
+
+ Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
+}
// Note:
// For all ToImm instructions below, a difference in case
@@ -5287,6 +5348,44 @@ Instr Assembler::ImmFP64(double imm) { return FP64ToImm8(imm) << ImmFP_offset; }
// Code generation helpers.
+bool Assembler::OneInstrMoveImmediateHelper(Assembler* assm,
+ const Register& dst,
+ uint64_t imm) {
+ bool emit_code = assm != NULL;
+ unsigned n, imm_s, imm_r;
+ int reg_size = dst.GetSizeInBits();
+
+ if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
+ // Immediate can be represented in a move zero instruction. Movz can't write
+ // to the stack pointer.
+ if (emit_code) {
+ assm->movz(dst, imm);
+ }
+ return true;
+ } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
+ // Immediate can be represented in a move negative instruction. Movn can't
+ // write to the stack pointer.
+ if (emit_code) {
+ assm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
+ }
+ return true;
+ } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
+ // Immediate can be represented in a logical orr instruction.
+ VIXL_ASSERT(!dst.IsZero());
+ if (emit_code) {
+ assm->LogicalImmediate(dst,
+ AppropriateZeroRegFor(dst),
+ n,
+ imm_s,
+ imm_r,
+ ORR);
+ }
+ return true;
+ }
+ return false;
+}
+
+
void Assembler::MoveWide(const Register& rd,
uint64_t imm,
int shift,
@@ -5694,11 +5793,11 @@ Instr Assembler::LoadStoreMemOperand(const MemOperand& addr,
ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0);
}
- if (addr.IsPreIndex() && IsImmLSUnscaled(offset)) {
+ if (addr.IsImmediatePreIndex() && IsImmLSUnscaled(offset)) {
return base | LoadStorePreIndexFixed | ImmLS(offset);
}
- if (addr.IsPostIndex() && IsImmLSUnscaled(offset)) {
+ if (addr.IsImmediatePostIndex() && IsImmLSUnscaled(offset)) {
return base | LoadStorePostIndexFixed | ImmLS(offset);
}
@@ -5720,10 +5819,10 @@ void Assembler::LoadStorePAC(const Register& xt,
const MemOperand& addr,
LoadStorePACOp op) {
VIXL_ASSERT(xt.Is64Bits());
- VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsPreIndex());
+ VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePreIndex());
Instr pac_op = op;
- if (addr.IsPreIndex()) {
+ if (addr.IsImmediatePreIndex()) {
pac_op |= LoadStorePACPreBit;
}
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index f7aafd07..65c55cc4 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -2105,6 +2105,13 @@ class Assembler : public vixl::internal::AssemblerBase {
MoveWide(rd, imm, shift, MOVZ);
}
+ // Move immediate, aliases for movz, movn, orr.
+ void mov(const Register& rd, uint64_t imm) {
+ if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
+ VIXL_UNIMPLEMENTED();
+ }
+ }
+
// Misc instructions.
// Monitor debug-mode breakpoint.
@@ -3360,6 +3367,21 @@ class Assembler : public vixl::internal::AssemblerBase {
// Unsigned dot product [Armv8.2].
void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+ // Dot Product with unsigned and signed integers (vector).
+ void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // Dot product with signed and unsigned integers (vector, by element).
+ void sudot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
+ // Dot product with unsigned and signed integers (vector, by element).
+ void usdot(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int vm_index);
+
// Signed saturating rounding doubling multiply subtract returning high half
// [Armv8.1].
void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
@@ -3586,6 +3608,15 @@ class Assembler : public vixl::internal::AssemblerBase {
const VRegister& vm,
int rot);
+ // Signed 8-bit integer matrix multiply-accumulate (vector).
+ void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
+ void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // Unsigned 8-bit integer matrix multiply-accumulate (vector).
+ void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
// Scalable Vector Extensions.
// Absolute value (predicated).
@@ -4584,6 +4615,26 @@ class Assembler : public vixl::internal::AssemblerBase {
const PRegisterZ& pg,
const SVEMemOperand& addr);
+ // Contiguous load and replicate thirty-two bytes.
+ void ld1rob(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate sixteen halfwords.
+ void ld1roh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate eight words.
+ void ld1row(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Contiguous load and replicate four doublewords.
+ void ld1rod(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
// Load and broadcast signed byte to vector.
void ld1rsb(const ZRegister& zt,
const PRegisterZ& pg,
@@ -5266,6 +5317,12 @@ class Assembler : public vixl::internal::AssemblerBase {
const ZRegister& zn,
const ZRegister& zm);
+ // Splice two vectors under predicate control (constructive).
+ void splice_con(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
// Signed saturating add vectors (unpredicated).
void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
@@ -5820,6 +5877,1030 @@ class Assembler : public vixl::internal::AssemblerBase {
// Interleave elements from two half vectors.
void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+ // Add with carry long (bottom).
+ void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Add with carry long (top).
+ void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Add narrow high part (bottom).
+ void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Add narrow high part (top).
+ void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Add pairwise.
+ void addp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Bitwise clear and exclusive OR.
+ void bcax(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Scatter lower bits into positions selected by bitmask.
+ void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Gather lower bits from positions selected by bitmask.
+ void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Group bits to right or left as selected by bitmask.
+ void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise select.
+ void bsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Bitwise select with first input inverted.
+ void bsl1n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Bitwise select with second input inverted.
+ void bsl2n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Complex integer add with rotate.
+ void cadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Complex integer dot product (indexed).
+ void cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+
+ // Complex integer dot product.
+ void cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Complex integer multiply-add with rotate (indexed).
+ void cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+
+ // Complex integer multiply-add with rotate.
+ void cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Bitwise exclusive OR of three vectors.
+ void eor3(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Interleaving exclusive OR (bottom, top).
+ void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Interleaving exclusive OR (top, bottom).
+ void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Floating-point add pairwise.
+ void faddp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point up convert long (top, predicated).
+ void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point down convert and narrow (top, predicated).
+ void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point down convert, rounding to odd (predicated).
+ void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point down convert, rounding to odd (top, predicated).
+ void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point base 2 logarithm as integer.
+ void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Floating-point maximum number pairwise.
+ void fmaxnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point maximum pairwise.
+ void fmaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point minimum number pairwise.
+ void fminnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Floating-point minimum pairwise.
+ void fminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Half-precision floating-point multiply-add long to single-precision
+ // (bottom).
+ void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Half-precision floating-point multiply-add long to single-precision
+ // (top).
+ void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Half-precision floating-point multiply-subtract long from
+ // single-precision (bottom).
+ void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Half-precision floating-point multiply-subtract long from
+ // single-precision (top, indexed).
+ void fmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Half-precision floating-point multiply-add long to single-precision
+ // (bottom, indexed).
+ void fmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Half-precision floating-point multiply-add long to single-precision
+ // (top, indexed).
+ void fmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Half-precision floating-point multiply-subtract long from
+ // single-precision (bottom, indexed).
+ void fmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Half-precision floating-point multiply-subtract long from
+ // single-precision (top).
+ void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Count matching elements in vector.
+ void histcnt(const ZRegister& zd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Count matching elements in vector segments.
+ void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Gather load non-temporal signed bytes.
+ void ldnt1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Gather load non-temporal signed halfwords.
+ void ldnt1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Gather load non-temporal signed words.
+ void ldnt1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+
+ // Detect any matching elements, setting the condition flags.
+ void match(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Multiply-add to accumulator (indexed).
+ void mla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Multiply-subtract from accumulator (indexed).
+ void mls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Multiply (indexed).
+ void mul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Multiply vectors (unpredicated).
+ void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Bitwise inverted select.
+ void nbsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+
+ // Detect no matching elements, setting the condition flags.
+ void nmatch(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Polynomial multiply vectors (unpredicated).
+ void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Polynomial multiply long (bottom).
+ void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Polynomial multiply long (top).
+ void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Rounding add narrow high part (bottom).
+ void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Rounding add narrow high part (top).
+ void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Rounding shift right narrow by immediate (bottom).
+ void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Rounding shift right narrow by immediate (top).
+ void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Rounding subtract narrow high part (bottom).
+ void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Rounding subtract narrow high part (top).
+ void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed absolute difference and accumulate.
+ void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed absolute difference and accumulate long (bottom).
+ void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed absolute difference and accumulate long (top).
+ void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed absolute difference long (bottom).
+ void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed absolute difference long (top).
+ void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed add and accumulate long pairwise.
+ void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed add long (bottom).
+ void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed add long (bottom + top).
+ void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed add long (top).
+ void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed add wide (bottom).
+ void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed add wide (top).
+ void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Subtract with carry long (bottom).
+ void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Subtract with carry long (top).
+ void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed halving addition.
+ void shadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Shift right narrow by immediate (bottom).
+ void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Shift right narrow by immediate (top).
+ void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed halving subtract.
+ void shsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed halving subtract reversed vectors.
+ void shsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Shift left and insert (immediate).
+ void sli(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed maximum pairwise.
+ void smaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed minimum pairwise.
+ void sminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed multiply-add long to accumulator (bottom, indexed).
+ void smlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply-add long to accumulator (bottom).
+ void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply-add long to accumulator (top, indexed).
+ void smlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply-add long to accumulator (top).
+ void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply-subtract long from accumulator (bottom, indexed).
+ void smlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply-subtract long from accumulator (bottom).
+ void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply-subtract long from accumulator (top, indexed).
+ void smlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply-subtract long from accumulator (top).
+ void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply returning high half (unpredicated).
+ void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply long (bottom, indexed).
+ void smullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply long (bottom).
+ void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed multiply long (top, indexed).
+ void smullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed multiply long (top).
+ void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating absolute value.
+ void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Signed saturating addition (predicated).
+ void sqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Saturating complex integer add with rotate.
+ void sqcadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Signed saturating doubling multiply-add long to accumulator (bottom,
+ // indexed).
+ void sqdmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply-add long to accumulator (bottom).
+ void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply-add long to accumulator (bottom x
+ // top).
+ void sqdmlalbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating doubling multiply-add long to accumulator (top,
+ // indexed).
+ void sqdmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply-add long to accumulator (top).
+ void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply-subtract long from accumulator
+ // (bottom, indexed).
+ void sqdmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply-subtract long from accumulator
+ // (bottom).
+ void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply-subtract long from accumulator
+ // (bottom x top).
+ void sqdmlslbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating doubling multiply-subtract long from accumulator
+ // (top, indexed).
+ void sqdmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply-subtract long from accumulator
+ // (top).
+ void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply high (indexed).
+ void sqdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply high (unpredicated).
+ void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply long (bottom, indexed).
+ void sqdmullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply long (bottom).
+ void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating doubling multiply long (top, indexed).
+ void sqdmullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating doubling multiply long (top).
+ void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating negate.
+ void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Saturating rounding doubling complex integer multiply-add high with
+ // rotate (indexed).
+ void sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+
+ // Saturating rounding doubling complex integer multiply-add high with
+ // rotate.
+ void sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+
+ // Signed saturating rounding doubling multiply-add high to accumulator
+ // (indexed).
+ void sqrdmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating rounding doubling multiply-add high to accumulator
+ // (unpredicated).
+ void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating rounding doubling multiply-subtract high from
+ // accumulator (indexed).
+ void sqrdmlsh(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating rounding doubling multiply-subtract high from
+ // accumulator (unpredicated).
+ void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating rounding doubling multiply high (indexed).
+ void sqrdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed saturating rounding doubling multiply high (unpredicated).
+ void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating rounding shift left by vector (predicated).
+ void sqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating rounding shift left reversed vectors (predicated).
+ void sqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating rounding shift right narrow by immediate (bottom).
+ void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating rounding shift right narrow by immediate (top).
+ void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating rounding shift right unsigned narrow by immediate
+ // (bottom).
+ void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating rounding shift right unsigned narrow by immediate
+ // (top).
+ void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating shift left by immediate.
+ void sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Signed saturating shift left by vector (predicated).
+ void sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating shift left reversed vectors (predicated).
+ void sqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating shift left unsigned by immediate.
+ void sqshlu(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Signed saturating shift right narrow by immediate (bottom).
+ void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating shift right narrow by immediate (top).
+ void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating shift right unsigned narrow by immediate (bottom).
+ void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating shift right unsigned narrow by immediate (top).
+ void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed saturating subtraction (predicated).
+ void sqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating subtraction reversed vectors (predicated).
+ void sqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed saturating extract narrow (bottom).
+ void sqxtnb(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed saturating extract narrow (top).
+ void sqxtnt(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed saturating unsigned extract narrow (bottom).
+ void sqxtunb(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed saturating unsigned extract narrow (top).
+ void sqxtunt(const ZRegister& zd, const ZRegister& zn);
+
+ // Signed rounding halving addition.
+ void srhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Shift right and insert (immediate).
+ void sri(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed rounding shift left by vector (predicated).
+ void srshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed rounding shift left reversed vectors (predicated).
+ void srshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Signed rounding shift right by immediate.
+ void srshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Signed rounding shift right and accumulate (immediate).
+ void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
+
+ // Signed shift left long by immediate (bottom).
+ void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed shift left long by immediate (top).
+ void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Signed shift right and accumulate (immediate).
+ void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
+
+ // Signed subtract long (bottom).
+ void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed subtract long (bottom - top).
+ void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed subtract long (top).
+ void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed subtract long (top - bottom).
+ void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed subtract wide (bottom).
+ void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed subtract wide (top).
+ void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Subtract narrow high part (bottom).
+ void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Subtract narrow high part (top).
+ void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed saturating addition of unsigned value.
+ void suqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Programmable table lookup in one or two vector table (zeroing).
+ void tbl(const ZRegister& zd,
+ const ZRegister& zn1,
+ const ZRegister& zn2,
+ const ZRegister& zm);
+
+ // Programmable table lookup in single vector table (merging).
+ void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference and accumulate.
+ void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference and accumulate long (bottom).
+ void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference and accumulate long (top).
+ void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference long (bottom).
+ void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned absolute difference long (top).
+ void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned add and accumulate long pairwise.
+ void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned add long (bottom).
+ void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned add long (top).
+ void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned add wide (bottom).
+ void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned add wide (top).
+ void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned halving addition.
+ void uhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned halving subtract.
+ void uhsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned halving subtract reversed vectors.
+ void uhsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned maximum pairwise.
+ void umaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned minimum pairwise.
+ void uminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned multiply-add long to accumulator (bottom, indexed).
+ void umlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply-add long to accumulator (bottom).
+ void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply-add long to accumulator (top, indexed).
+ void umlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply-add long to accumulator (top).
+ void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply-subtract long from accumulator (bottom, indexed).
+ void umlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply-subtract long from accumulator (bottom).
+ void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply-subtract long from accumulator (top, indexed).
+ void umlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply-subtract long from accumulator (top).
+ void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply returning high half (unpredicated).
+ void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply long (bottom, indexed).
+ void umullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply long (bottom).
+ void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned multiply long (top, indexed).
+ void umullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Unsigned multiply long (top).
+ void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned saturating addition (predicated).
+ void uqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating rounding shift left by vector (predicated).
+ void uqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating rounding shift left reversed vectors (predicated).
+ void uqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating rounding shift right narrow by immediate (bottom).
+ void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned saturating rounding shift right narrow by immediate (top).
+ void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned saturating shift left by immediate.
+ void uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Unsigned saturating shift left by vector (predicated).
+ void uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating shift left reversed vectors (predicated).
+ void uqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating shift right narrow by immediate (bottom).
+ void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned saturating shift right narrow by immediate (top).
+ void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned saturating subtraction (predicated).
+ void uqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating subtraction reversed vectors (predicated).
+ void uqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned saturating extract narrow (bottom).
+ void uqxtnb(const ZRegister& zd, const ZRegister& zn);
+
+ // Unsigned saturating extract narrow (top).
+ void uqxtnt(const ZRegister& zd, const ZRegister& zn);
+
+ // Unsigned reciprocal estimate (predicated).
+ void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned rounding halving addition.
+ void urhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned rounding shift left by vector (predicated).
+ void urshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned rounding shift left reversed vectors (predicated).
+ void urshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned rounding shift right by immediate.
+ void urshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift);
+
+ // Unsigned reciprocal square root estimate (predicated).
+ void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
+
+ // Unsigned rounding shift right and accumulate (immediate).
+ void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
+
+ // Unsigned shift left long by immediate (bottom).
+ void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned shift left long by immediate (top).
+ void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
+
+ // Unsigned saturating addition of signed value.
+ void usqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ // Unsigned shift right and accumulate (immediate).
+ void usra(const ZRegister& zda, const ZRegister& zn, int shift);
+
+ // Unsigned subtract long (bottom).
+ void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned subtract long (top).
+ void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned subtract wide (bottom).
+ void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned subtract wide (top).
+ void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
+
+ // While decrementing signed scalar greater than or equal to scalar.
+ void whilege(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While decrementing signed scalar greater than scalar.
+ void whilegt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While decrementing unsigned scalar higher than scalar.
+ void whilehi(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While decrementing unsigned scalar higher or same as scalar.
+ void whilehs(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While free of read-after-write conflicts.
+ void whilerw(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // While free of write-after-read/write conflicts.
+ void whilewr(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm);
+
+ // Bitwise exclusive OR and rotate right by immediate.
+ void xar(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int shift);
+
+ // Floating-point matrix multiply-accumulate.
+ void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Signed integer matrix multiply-accumulate.
+ void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned by signed integer matrix multiply-accumulate.
+ void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned integer matrix multiply-accumulate.
+ void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned by signed integer dot product.
+ void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
+
+ // Unsigned by signed integer indexed dot product.
+ void usdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
+ // Signed by unsigned integer indexed dot product.
+ void sudot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
// Emit generic instructions.
// Emit raw instructions into the instruction stream.
@@ -5850,6 +6931,9 @@ class Assembler : public vixl::internal::AssemblerBase {
}
// Code generation helpers.
+ static bool OneInstrMoveImmediateHelper(Assembler* assm,
+ const Register& dst,
+ uint64_t imm);
// Register encoding.
template <int hibit, int lobit>
@@ -5983,11 +7067,11 @@ class Assembler : public vixl::internal::AssemblerBase {
static Instr ImmTestBranchBit(unsigned bit_pos) {
VIXL_ASSERT(IsUint6(bit_pos));
// Subtract five from the shift offset, as we need bit 5 from bit_pos.
- unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
- unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
- b5 &= ImmTestBranchBit5_mask;
- b40 &= ImmTestBranchBit40_mask;
- return b5 | b40;
+ unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
+ unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
+ bit5 &= ImmTestBranchBit5_mask;
+ bit40 &= ImmTestBranchBit40_mask;
+ return bit5 | bit40;
}
// Data Processing encoding.
@@ -6660,6 +7744,16 @@ class Assembler : public vixl::internal::AssemblerBase {
Instr immoffset_op,
int imm_divisor = 1);
+ void SVELd1VecScaHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ uint32_t msize,
+ bool is_signed);
+ void SVESt1VecScaHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ uint32_t msize);
+
void Prefetch(PrefetchOperation op,
const MemOperand& addr,
LoadStoreScalingOption option = PreferScaledOffset);
@@ -6724,27 +7818,30 @@ class Assembler : public vixl::internal::AssemblerBase {
int pattern,
int multiplier);
- Instr EncodeSVEShiftImmediate(Shift shift_op,
- int shift,
- int lane_size_in_bits);
+ Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
+
+ Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
void SVEBitwiseShiftImmediate(const ZRegister& zd,
const ZRegister& zn,
Instr encoded_imm,
- SVEBitwiseShiftUnpredicatedOp op);
+ Instr op);
void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
const PRegisterM& pg,
Instr encoded_imm,
- SVEBitwiseShiftByImm_PredicatedOp op);
+ Instr op);
+
+ Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
+ const ZRegister& zm,
+ int index,
+ Instr op_h,
+ Instr op_s,
+ Instr op_d);
- Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
- const ZRegister& zm,
- int index,
- Instr op_h,
- Instr op_s,
- Instr op_d);
+ Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
+ Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
const PRegister& pg,
diff --git a/src/aarch64/assembler-sve-aarch64.cc b/src/aarch64/assembler-sve-aarch64.cc
index f7cf8b21..84d4d517 100644
--- a/src/aarch64/assembler-sve-aarch64.cc
+++ b/src/aarch64/assembler-sve-aarch64.cc
@@ -165,11 +165,10 @@ void Assembler::orr(const ZRegister& zd,
// SVEBitwiseShiftPredicated.
-void Assembler::SVEBitwiseShiftImmediatePred(
- const ZRegister& zdn,
- const PRegisterM& pg,
- Instr encoded_imm_and_tsz,
- SVEBitwiseShiftByImm_PredicatedOp op) {
+void Assembler::SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
+ const PRegisterM& pg,
+ Instr encoded_imm_and_tsz,
+ Instr op) {
Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
<< 5;
Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
@@ -189,7 +188,7 @@ void Assembler::asr(const ZRegister& zd,
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
VIXL_ASSERT(zd.Is(zn));
Instr encoded_imm =
- EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi);
}
@@ -229,7 +228,7 @@ void Assembler::asrd(const ZRegister& zd,
VIXL_ASSERT(zd.Is(zn));
Instr encoded_imm =
- EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi);
}
@@ -264,7 +263,7 @@ void Assembler::lsl(const ZRegister& zd,
VIXL_ASSERT(zd.Is(zn));
Instr encoded_imm =
- EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi);
}
@@ -321,7 +320,7 @@ void Assembler::lsr(const ZRegister& zd,
VIXL_ASSERT(zd.Is(zn));
Instr encoded_imm =
- EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi);
}
@@ -366,15 +365,13 @@ void Assembler::lsrr(const ZRegister& zd,
// SVEBitwiseShiftUnpredicated.
-Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op,
- int shift,
- int lane_size_in_bits) {
- if (shift_op == LSL) {
- VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
- return lane_size_in_bits + shift;
- }
+Instr Assembler::EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits) {
+ VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits));
+ return lane_size_in_bits + shift;
+}
- VIXL_ASSERT((shift_op == ASR) || (shift_op == LSR));
+Instr Assembler::EncodeSVEShiftRightImmediate(int shift,
+ int lane_size_in_bits) {
VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits));
return (2 * lane_size_in_bits) - shift;
}
@@ -382,7 +379,7 @@ Instr Assembler::EncodeSVEShiftImmediate(Shift shift_op,
void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd,
const ZRegister& zn,
Instr encoded_imm_and_tsz,
- SVEBitwiseShiftUnpredicatedOp op) {
+ Instr op) {
Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz)
<< 16;
Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22;
@@ -393,7 +390,7 @@ void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) {
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
VIXL_ASSERT(AreSameLaneSize(zd, zn));
Instr encoded_imm =
- EncodeSVEShiftImmediate(ASR, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi);
}
@@ -410,7 +407,7 @@ void Assembler::asr(const ZRegister& zd,
void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) {
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
Instr encoded_imm =
- EncodeSVEShiftImmediate(LSL, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi);
}
@@ -427,7 +424,7 @@ void Assembler::lsl(const ZRegister& zd,
void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) {
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
Instr encoded_imm =
- EncodeSVEShiftImmediate(LSR, shift, zd.GetLaneSizeInBits());
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi);
}
@@ -1318,26 +1315,10 @@ void Assembler::fcmla(const ZRegister& zda,
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
- VIXL_ASSERT(index >= 0);
-
- int lane_size = zda.GetLaneSizeInBytes();
-
- Instr zm_and_idx = 0;
- Instr op = FCMLA_z_zzzi_h;
- if (lane_size == kHRegSizeInBytes) {
- // Zm<18:16> | i2<20:19>
- VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3));
- zm_and_idx = (index << 19) | Rx<18, 16>(zm);
- } else {
- // Zm<19:16> | i1<20>
- VIXL_ASSERT(lane_size == kSRegSizeInBytes);
- VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1));
- zm_and_idx = (index << 20) | Rx<19, 16>(zm);
- op = FCMLA_z_zzzi_s;
- }
Instr rotate_bit = (rot / 90) << 10;
- Emit(op | zm_and_idx | rotate_bit | Rd(zda) | Rn(zn));
+ Emit(FCMLA_z_zzzi_h | SVEMulComplexIndexHelper(zm, index) | rotate_bit |
+ Rd(zda) | Rn(zn));
}
// SVEFPFastReduction.
@@ -1539,12 +1520,12 @@ void Assembler::fnmsb(const ZRegister& zdn,
Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za));
}
-Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
- const ZRegister& zm,
- int index,
- Instr op_h,
- Instr op_s,
- Instr op_d) {
+Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
+ const ZRegister& zm,
+ int index,
+ Instr op_h,
+ Instr op_s,
+ Instr op_d) {
Instr size = lane_size_in_bytes_log2 << SVESize_offset;
Instr zm_with_index = Rm(zm);
Instr op = 0xffffffff;
@@ -1563,15 +1544,15 @@ Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
op = op_h;
break;
case kSRegSizeInBytesLog2:
- VIXL_ASSERT(zm.GetCode() <= 7);
- VIXL_ASSERT(IsUint2(index));
+ VIXL_CHECK(zm.GetCode() <= 7);
+ VIXL_CHECK(IsUint2(index));
// Top two bits of "zm" encode the index.
zm_with_index |= (index & 3) << (Rm_offset + 3);
op = op_s;
break;
case kDRegSizeInBytesLog2:
- VIXL_ASSERT(zm.GetCode() <= 15);
- VIXL_ASSERT(IsUint1(index));
+ VIXL_CHECK(zm.GetCode() <= 15);
+ VIXL_CHECK(IsUint1(index));
// Top bit of "zm" encodes the index.
zm_with_index |= (index & 1) << (Rm_offset + 4);
op = op_d;
@@ -1582,6 +1563,45 @@ Instr Assembler::SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
return op | zm_with_index | size;
}
+Instr Assembler::SVEMulLongIndexHelper(const ZRegister& zm, int index) {
+ Instr imm_field;
+ Instr zm_id;
+ if (zm.IsLaneSizeH()) {
+ VIXL_CHECK(zm.GetCode() <= 7);
+ VIXL_CHECK(IsUint3(index));
+ imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19;
+ zm_id = Rx<18, 16>(zm);
+ } else {
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_CHECK(zm.GetCode() <= 15);
+ VIXL_CHECK(IsUint2(index));
+ imm_field = ExtractBit(index, 1) << 20;
+ zm_id = Rx<19, 16>(zm);
+ }
+
+ // Synthesize the low part of immediate encoding.
+ imm_field |= ExtractBit(index, 0) << 11;
+
+ return zm_id | imm_field;
+}
+
+Instr Assembler::SVEMulComplexIndexHelper(const ZRegister& zm, int index) {
+ Instr zm_idx_size;
+ if (zm.IsLaneSizeH()) {
+ // Zm<18:16> | i2<20:19>
+ VIXL_CHECK(zm.GetCode() <= 7);
+ VIXL_CHECK(IsUint2(index));
+ zm_idx_size = (index << 19) | Rx<18, 16>(zm) | 0;
+ } else {
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ // Zm<19:16> | i1<20>
+ VIXL_CHECK(zm.GetCode() <= 15);
+ VIXL_CHECK(IsUint1(index));
+ zm_idx_size = (index << 20) | Rx<19, 16>(zm) | (1 << 22);
+ }
+ return zm_idx_size;
+}
+
// SVEFPMulAddIndex.
void Assembler::fmla(const ZRegister& zda,
@@ -1593,12 +1613,12 @@ void Assembler::fmla(const ZRegister& zda,
// The encoding of opcode, index, Zm, and size are synthesized in this
// variable.
- Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
- zm,
- index,
- FMLA_z_zzzi_h,
- FMLA_z_zzzi_s,
- FMLA_z_zzzi_d);
+ Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMLA_z_zzzi_h,
+ FMLA_z_zzzi_s,
+ FMLA_z_zzzi_d);
Emit(synthesized_op | Rd(zda) | Rn(zn));
}
@@ -1612,12 +1632,12 @@ void Assembler::fmls(const ZRegister& zda,
// The encoding of opcode, index, Zm, and size are synthesized in this
// variable.
- Instr synthesized_op = SVEFPMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
- zm,
- index,
- FMLS_z_zzzi_h,
- FMLS_z_zzzi_s,
- FMLS_z_zzzi_d);
+ Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMLS_z_zzzi_h,
+ FMLS_z_zzzi_s,
+ FMLS_z_zzzi_d);
Emit(synthesized_op | Rd(zda) | Rn(zn));
}
@@ -1638,12 +1658,12 @@ void Assembler::fmul(const ZRegister& zd,
// The encoding of opcode, index, Zm, and size are synthesized in this
// variable.
- Instr synthesized_op = SVEFPMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
- zm,
- index,
- FMUL_z_zzi_h,
- FMUL_z_zzi_s,
- FMUL_z_zzi_d);
+ Instr synthesized_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ FMUL_z_zzi_h,
+ FMUL_z_zzi_s,
+ FMUL_z_zzi_d);
Emit(synthesized_op | Rd(zd) | Rn(zn));
}
@@ -4743,57 +4763,67 @@ void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt,
Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase()));
}
-void Assembler::ld1rqb(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(0));
- VIXL_ASSERT(zt.IsLaneSizeB());
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LD1RQB_z_p_br_contiguous,
- LD1RQB_z_p_bi_u8,
- 16);
-}
+void Assembler::SVELd1VecScaHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ uint32_t msize_bytes_log2,
+ bool is_signed) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(addr.IsVectorPlusScalar());
+ ZRegister zn = addr.GetVectorBase();
+ VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+ VIXL_ASSERT(AreSameLaneSize(zn, zt));
-void Assembler::ld1rqd(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(3));
- VIXL_ASSERT(zt.IsLaneSizeD());
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LD1RQD_z_p_br_contiguous,
- LD1RQD_z_p_bi_u64,
- 16);
-}
+ uint32_t esize = zn.GetLaneSizeInBytesLog2();
+ uint32_t b14_13 = 0;
+ if (!is_signed) b14_13 = zn.IsLaneSizeS() ? 0x1 : 0x2;
-void Assembler::ld1rqh(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(1));
- VIXL_ASSERT(zt.IsLaneSizeH());
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LD1RQH_z_p_br_contiguous,
- LD1RQH_z_p_bi_u16,
- 16);
+ Instr op = 0x04008000; // LDNT1 with vector plus scalar addressing mode.
+ op |= (esize << 30) | (msize_bytes_log2 << 23) | (b14_13 << 13);
+ Emit(op | Rt(zt) | PgLow8(pg) |
+ SVEMemOperandHelper(msize_bytes_log2, 1, addr, true));
}
-void Assembler::ld1rqw(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(2));
- VIXL_ASSERT(zt.IsLaneSizeS());
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LD1RQW_z_p_br_contiguous,
- LD1RQW_z_p_bi_u32,
- 16);
-}
+void Assembler::SVESt1VecScaHelper(const ZRegister& zt,
+ const PRegister& pg,
+ const SVEMemOperand& addr,
+ uint32_t msize_bytes_log2) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(addr.IsVectorPlusScalar());
+ ZRegister zn = addr.GetVectorBase();
+ VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+ VIXL_ASSERT(AreSameLaneSize(zn, zt));
+
+ uint32_t bit22 = zn.IsLaneSizeS() ? (1 << 22) : 0;
+ Instr op = 0xe4002000; // STNT1 with vector plus scalar addressing mode.
+ op |= bit22 | (msize_bytes_log2 << 23);
+ Emit(op | Rt(zt) | PgLow8(pg) |
+ SVEMemOperandHelper(msize_bytes_log2, 1, addr, true));
+}
+
+#define VIXL_SVE_LD1R_LIST(V) \
+ V(qb, 0, B, LD1RQB_z_p_br_contiguous, LD1RQB_z_p_bi_u8, 16) \
+ V(qh, 1, H, LD1RQH_z_p_br_contiguous, LD1RQH_z_p_bi_u16, 16) \
+ V(qw, 2, S, LD1RQW_z_p_br_contiguous, LD1RQW_z_p_bi_u32, 16) \
+ V(qd, 3, D, LD1RQD_z_p_br_contiguous, LD1RQD_z_p_bi_u64, 16) \
+ V(ob, 0, B, 0xa4200000, 0xa4202000, 32) \
+ V(oh, 1, H, 0xa4a00000, 0xa4a02000, 32) \
+ V(ow, 2, S, 0xa5200000, 0xa5202000, 32) \
+ V(od, 3, D, 0xa5a00000, 0xa5a02000, 32)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, SH, SZ, SCA, IMM, BYTES) \
+ void Assembler::ld1r##FN(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT((BYTES == 16) || \
+ ((BYTES == 32) && (CPUHas(CPUFeatures::kSVEF64MM)))); \
+ VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(SH)); \
+ VIXL_ASSERT(zt.IsLaneSize##SZ()); \
+ SVELd1St1ScaImmHelper(zt, pg, addr, SCA, IMM, BYTES); \
+ }
+VIXL_SVE_LD1R_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+#undef VIXL_SVE_LD1R_LIST
#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \
void Assembler::ldff1##MSZ(const ZRegister& zt, \
@@ -4930,12 +4960,17 @@ void Assembler::ldnt1b(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LDNT1B_z_p_br_contiguous,
- LDNT1B_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ false);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1B_z_p_br_contiguous,
+ LDNT1B_z_p_bi_contiguous);
+ }
}
void Assembler::ldnt1d(const ZRegister& zt,
@@ -4943,12 +4978,17 @@ void Assembler::ldnt1d(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LDNT1D_z_p_br_contiguous,
- LDNT1D_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVELd1VecScaHelper(zt, pg, addr, 3, /* is_signed = */ false);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1D_z_p_br_contiguous,
+ LDNT1D_z_p_bi_contiguous);
+ }
}
void Assembler::ldnt1h(const ZRegister& zt,
@@ -4956,12 +4996,17 @@ void Assembler::ldnt1h(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LDNT1H_z_p_br_contiguous,
- LDNT1H_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ false);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1H_z_p_br_contiguous,
+ LDNT1H_z_p_bi_contiguous);
+ }
}
void Assembler::ldnt1w(const ZRegister& zt,
@@ -4969,12 +5014,38 @@ void Assembler::ldnt1w(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- LDNT1W_z_p_br_contiguous,
- LDNT1W_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ false);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ LDNT1W_z_p_br_contiguous,
+ LDNT1W_z_p_bi_contiguous);
+ }
+}
+
+void Assembler::ldnt1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2));
+ SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ true);
+}
+
+void Assembler::ldnt1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2));
+ SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ true);
+}
+
+void Assembler::ldnt1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2));
+ SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ true);
}
Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2,
@@ -5002,7 +5073,13 @@ Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2,
VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2)));
op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2);
-
+ } else if (addr.IsVectorPlusScalar()) {
+ VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER);
+ VIXL_ASSERT(addr.GetShiftAmount() == 0);
+ ZRegister zn = addr.GetVectorBase();
+ VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD());
+ Register xm = addr.GetScalarOffset();
+ op = Rn(zn) | Rm(xm);
} else if (addr.IsScalarPlusVector()) {
// We have to support several different addressing modes. Some instructions
// support a subset of these, but the SVEMemOperand encoding is consistent.
@@ -5156,12 +5233,17 @@ void Assembler::stnt1b(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- STNT1B_z_p_br_contiguous,
- STNT1B_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVESt1VecScaHelper(zt, pg, addr, 0);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1B_z_p_br_contiguous,
+ STNT1B_z_p_bi_contiguous);
+ }
}
void Assembler::stnt1d(const ZRegister& zt,
@@ -5169,12 +5251,17 @@ void Assembler::stnt1d(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- STNT1D_z_p_br_contiguous,
- STNT1D_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVESt1VecScaHelper(zt, pg, addr, 3);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1D_z_p_br_contiguous,
+ STNT1D_z_p_bi_contiguous);
+ }
}
void Assembler::stnt1h(const ZRegister& zt,
@@ -5182,12 +5269,17 @@ void Assembler::stnt1h(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- STNT1H_z_p_br_contiguous,
- STNT1H_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVESt1VecScaHelper(zt, pg, addr, 1);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1H_z_p_br_contiguous,
+ STNT1H_z_p_bi_contiguous);
+ }
}
void Assembler::stnt1w(const ZRegister& zt,
@@ -5195,12 +5287,17 @@ void Assembler::stnt1w(const ZRegister& zt,
const SVEMemOperand& addr) {
VIXL_ASSERT(addr.IsPlainScalar() ||
(addr.IsScalarPlusImmediate() && addr.IsMulVl()) ||
- (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)));
- SVELd1St1ScaImmHelper(zt,
- pg,
- addr,
- STNT1W_z_p_br_contiguous,
- STNT1W_z_p_bi_contiguous);
+ (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) ||
+ (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)));
+ if (addr.IsVectorPlusScalar()) {
+ SVESt1VecScaHelper(zt, pg, addr, 2);
+ } else {
+ SVELd1St1ScaImmHelper(zt,
+ pg,
+ addr,
+ STNT1W_z_p_br_contiguous,
+ STNT1W_z_p_bi_contiguous);
+ }
}
void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) {
@@ -5471,14 +5568,27 @@ void Assembler::ext(const ZRegister& zd,
// 0000 0101 001. .... 000. .... .... ....
// imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0>
- USE(zn);
+ // EXT <Zd>.B, { <Zn1>.B, <Zn2>.B }, #<imm>
+ // 0000 0101 011. .... 000. .... .... ....
+ // imm8h<20:16> | imm8l<12:10> | Zn<9:5> | Zd<4:0>
+
VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
- VIXL_ASSERT(zd.Is(zn));
VIXL_ASSERT(IsUint8(offset));
int imm8h = ExtractUnsignedBitfield32(7, 3, offset);
int imm8l = ExtractUnsignedBitfield32(2, 0, offset);
- Emit(EXT_z_zi_des | Rd(zd) | Rn(zm) | ImmUnsignedField<20, 16>(imm8h) |
+
+ Instr op;
+ if (zd.Is(zn)) {
+ // Destructive form.
+ op = EXT_z_zi_des | Rn(zm);
+ } else {
+ // Constructive form (requires SVE2).
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm));
+ op = 0x05600000 | Rn(zn);
+ }
+
+ Emit(op | Rd(zd) | ImmUnsignedField<20, 16>(imm8h) |
ImmUnsignedField<12, 10>(imm8l));
}
@@ -5814,16 +5924,37 @@ void Assembler::splice(const ZRegister& zd,
const PRegister& pg,
const ZRegister& zn,
const ZRegister& zm) {
- // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
- // 0000 0101 ..10 1100 100. .... .... ....
- // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
-
- USE(zn);
- VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
- VIXL_ASSERT(zd.Is(zn));
VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
- Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+ if (zd.Aliases(zn)) {
+ // SPLICE <Zdn>.<T>, <Pg>, <Zdn>.<T>, <Zm>.<T>
+ // 0000 0101 ..10 1100 100. .... .... ....
+ // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(zd.Is(zn));
+
+ Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+ } else {
+ splice_con(zd, pg, zn, zm);
+ }
+}
+
+void Assembler::splice_con(const ZRegister& zd,
+ const PRegister& pg,
+ const ZRegister& zn1,
+ const ZRegister& zn2) {
+ // SPLICE <Zd>.<T>, <Pg>, { <Zn1>.<T>, <Zn2>.<T> }
+ // 0000 0101 ..10 1101 100. .... .... ....
+ // size<23:22> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ USE(zn2);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreConsecutive(zn1, zn2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2));
+
+ Emit(0x052d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn1));
}
// SVEPermuteVectorUnpredicated.
@@ -6485,5 +6616,3284 @@ void Assembler::nots(const PRegisterWithLaneSize& pd,
eors(pd, pg, pn, pg.VnB());
}
+// SVE2
+
+void Assembler::adclb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 0.0. .... 1101 00.. .... ....
+ // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+
+ Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0;
+ Emit(0x4500d000 | sz | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::adclt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 0.0. .... 1101 01.. .... ....
+ // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+
+ Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0;
+ Emit(0x4500d400 | sz | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::addhnb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0110 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45206000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::addhnt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0110 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45206400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::addp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // ADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0001 101. .... .... ....
+ // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4411a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::bcax(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // BCAX <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 011. .... 0011 10.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04603800 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::bdep(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // BDEP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1011 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4500b400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::bext(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // BEXT <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1011 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4500b000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::bgrp(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // BGRP <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1011 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4500b800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::bsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // BSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 001. .... 0011 11.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04203c00 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::bsl1n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // BSL1N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 011. .... 0011 11.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04603c00 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::bsl2n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // BSL2N <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 101. .... 0011 11.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04a03c00 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::cadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // CADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+ // 0100 0101 ..00 0000 1101 1... .... ....
+ // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT((rot == 90) || (rot == 270));
+
+ Instr rotate_bit = (rot == 90) ? 0 : (1 << 10);
+ Emit(0x4500d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm));
+}
+
+void Assembler::cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ // CDOT <Zda>.D, <Zn>.H, <Zm>.H[<imm>], <const>
+ // 0100 0100 111. .... 0100 .... .... ....
+ // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeB()) {
+ // Zm<18:16> | i2<20:19>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3));
+ zm_and_idx = (index << 19) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i1<20>
+ VIXL_ASSERT(zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1));
+ zm_and_idx = (index << 20) | Rx<19, 16>(zm);
+ }
+
+ Instr rotate_bits = (rot / 90) << 10;
+ Emit(0x44a04000 | zm_and_idx | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::cdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // CDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>, <const>
+ // 0100 0100 ..0. .... 0001 .... .... ....
+ // size<23:22> | Zm<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4));
+
+ Instr rotate_bits = (rot / 90) << 10;
+ Emit(0x44001000 | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ // CMLA <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
+ // 0100 0100 101. .... 0110 .... .... ....
+ // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+ Instr rotate_bit = (rot / 90) << 10;
+ Emit(0x44a06000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) |
+ Rn(zn));
+}
+
+void Assembler::cmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // CMLA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+ // 0100 0100 ..0. .... 0010 .... .... ....
+ // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+ Instr rotate_bit = (rot / 90) << 10;
+ Emit(0x44002000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::eor3(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // EOR3 <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 001. .... 0011 10.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04203800 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::eorbt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // EORBT <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1001 00.. .... ....
+ // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x45009000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::eortb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // EORTB <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1001 01.. .... ....
+ // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x45009400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::faddp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0100 ..01 0000 100. .... .... ....
+ // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x64108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fcvtlt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FCVTLT <Zd>.S, <Pg>/M, <Zn>.H
+ // 0110 0100 1000 1001 101. .... .... ....
+ // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Instr op;
+ if (zd.IsLaneSizeD() && zn.IsLaneSizeS()) {
+ op = 0x64cba000;
+ } else {
+ VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeH());
+ op = 0x6489a000;
+ }
+
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtnt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FCVTNT <Zd>.S, <Pg>/M, <Zn>.D
+ // 0110 0100 1100 1010 101. .... .... ....
+ // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Instr op;
+ if (zd.IsLaneSizeS() && zn.IsLaneSizeD()) {
+ op = 0x64caa000;
+ } else {
+ VIXL_ASSERT(zd.IsLaneSizeH() && zn.IsLaneSizeS());
+ op = 0x6488a000;
+ }
+ Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtx(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FCVTX <Zd>.S, <Pg>/M, <Zn>.D
+ // 0110 0101 0000 1010 101. .... .... ....
+ // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeD());
+
+ Emit(0x650aa000 | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fcvtxnt(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D
+ // 0110 0100 0000 1010 101. .... .... ....
+ // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0x640aa000 | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::flogb(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // FLOGB <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0110 0101 0001 1..0 101. .... .... ....
+ // opc<23:22> | opc2<18:17> | U<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> | size<>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+
+ // Size field is encoded in bits <18:17> rather than <23:22>.
+ Instr size = SVESize(zd) >> 5;
+ Emit(0x6518a000 | size | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::fmaxnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMAXNMP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0100 ..01 0100 100. .... .... ....
+ // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x64148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0100 ..01 0110 100. .... .... ....
+ // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x64168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fminnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMINNMP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0100 ..01 0101 100. .... .... ....
+ // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x64158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0110 0100 ..01 0111 100. .... .... ....
+ // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x64178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::fmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLALB <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1000 00.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+
+ Emit(0x64a08000 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+
+ Emit(0x64a04000 | Rd(zda) | Rn(zn) | zm_and_idx);
+}
+
+void Assembler::fmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLALT <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1000 01.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+
+ Emit(0x64a08400 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // FMLALT <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1000 01.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+
+ Emit(0x64a04400 | Rd(zda) | Rn(zn) | zm_and_idx);
+}
+
+void Assembler::fmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1010 00.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+
+ Emit(0x64a0a000 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // FMLSLB <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1010 00.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+
+ Emit(0x64a06000 | Rd(zda) | Rn(zn) | zm_and_idx);
+}
+
+void Assembler::fmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1010 01.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+
+ Emit(0x64a0a400 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::fmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // FMLSLT <Zda>.S, <Zn>.H, <Zm>.H
+ // 0110 0100 101. .... 1010 01.. .... ....
+ // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH());
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+
+ Emit(0x64a06400 | Rd(zda) | Rn(zn) | zm_and_idx);
+}
+
+void Assembler::histcnt(const ZRegister& zd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // HISTCNT <Zd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..1. .... 110. .... .... ....
+ // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD());
+
+ Emit(0x4520c000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::histseg(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // HISTSEG <Zd>.B, <Zn>.B, <Zm>.B
+ // 0100 0101 ..1. .... 1010 00.. .... ....
+ // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT(zd.IsLaneSizeB());
+
+ Emit(0x4520a000 | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::match(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // MATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..1. .... 100. .... ...0 ....
+ // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn, zm));
+ VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH());
+
+ Emit(0x45208000 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::mla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // MLA <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+ // 0100 0100 111. .... 0000 10.. .... ....
+ // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ 0x44200800,
+ 0x44a00800,
+ 0x44e00800);
+
+ Emit(synthesised_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::mls(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // MLS <Zda>.D, <Zn>.D, <Zm>.D[<imm>]
+ // 0100 0100 111. .... 0000 11.. .... ....
+ // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ 0x44200c00,
+ 0x44a00c00,
+ 0x44e00c00);
+
+ Emit(synthesised_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::mul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
+ // 0100 0100 111. .... 1111 10.. .... ....
+ // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ 0x4420f800,
+ 0x44a0f800,
+ 0x44e0f800);
+
+ Emit(synthesised_op | Rd(zd) | Rn(zn));
+}
+
+void Assembler::mul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // MUL <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0110 00.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x04206000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::nbsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk) {
+ // NBSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D
+ // 0000 0100 111. .... 0011 11.. .... ....
+ // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk));
+ VIXL_ASSERT(zd.IsLaneSizeD());
+
+ Emit(0x04e03c00 | Rd(zd) | Rm(zm) | Rn(zk));
+}
+
+void Assembler::nmatch(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // NMATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..1. .... 100. .... ...1 ....
+ // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(pd, zn, zm));
+ VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH());
+
+ Emit(0x45208010 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::pmul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // PMUL <Zd>.B, <Zn>.B, <Zm>.B
+ // 0000 0100 001. .... 0110 01.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0x04206400 | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::pmullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // PMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0110 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+ // SVEPmull128 is not supported
+ VIXL_ASSERT(!zd.IsLaneSizeQ());
+
+ Emit(0x45006800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::pmullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // PMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0110 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+ // SVEPmull128 is not supported
+ VIXL_ASSERT(!zd.IsLaneSizeQ());
+
+ Emit(0x45006c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::raddhnb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // RADDHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0110 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45206800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::raddhnt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // RADDHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0110 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45206c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+#define VIXL_SVE_SHR_LIST(V) \
+ V(rshrnb, 0x45201800) \
+ V(rshrnt, 0x45201c00) \
+ V(shrnb, 0x45201000) \
+ V(shrnt, 0x45201400) \
+ V(sqrshrnb, 0x45202800) \
+ V(sqrshrnt, 0x45202c00) \
+ V(sqrshrunb, 0x45200800) \
+ V(sqrshrunt, 0x45200c00) \
+ V(sqshrnb, 0x45202000) \
+ V(sqshrnt, 0x45202400) \
+ V(sqshrunb, 0x45200000) \
+ V(sqshrunt, 0x45200400) \
+ V(uqrshrnb, 0x45203800) \
+ V(uqrshrnt, 0x45203c00) \
+ V(uqshrnb, 0x45203000) \
+ V(uqshrnt, 0x45203400)
+
+#define VIXL_DEFINE_ASM_FUNC(MNE, X) \
+ void Assembler::MNE(const ZRegister& zd, const ZRegister& zn, int shift) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \
+ VIXL_ASSERT(!zd.IsLaneSizeD() && !zd.IsLaneSizeQ()); \
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); \
+ Instr encoded_imm = \
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); \
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, X); \
+ }
+VIXL_SVE_SHR_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
+void Assembler::rsubhnb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // RSUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0111 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45207800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::rsubhnt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // RSUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0111 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45207c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::saba(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1111 10.. .... ....
+ // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(0x4500f800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sabalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1100 00.. .... ....
+ // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4500c000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sabalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1100 01.. .... ....
+ // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4500c400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sabdlb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0011 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45003000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sabdlt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0011 01.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45003400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sadalp(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
+ // 0100 0100 ..00 0100 101. .... .... ....
+ // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4404a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::saddlb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0000 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45000000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::saddlbt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SADDLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1000 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS());
+
+ Emit(0x45008000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::saddlt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0000 01.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45000400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::saddwb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0100 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45004000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::saddwt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0100 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45004400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sbclb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SBCLB <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 1.0. .... 1101 00.. .... ....
+ // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+
+ Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0;
+ Emit(0x4580d000 | sz | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sbclt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SBCLT <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 1.0. .... 1101 01.. .... ....
+ // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+
+ Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0;
+ Emit(0x4580d400 | sz | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::shadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0000 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::shsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SHSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0010 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44128000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::shsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SHSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0110 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sli(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // SLI <Zd>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1111 01.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f400);
+}
+
+void Assembler::smaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0100 101. .... .... ....
+ // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4414a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0110 101. .... .... ....
+ // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4416a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+#define VIXL_SVE_MULL_INDEX_LIST(V) \
+ V(smullb, 0x44a0c000) \
+ V(smullt, 0x44a0c400) \
+ V(umullb, 0x44a0d000) \
+ V(umullt, 0x44a0d400) \
+ V(smlalb, 0x44a08000) \
+ V(smlalt, 0x44a08400) \
+ V(smlslb, 0x44a0a000) \
+ V(smlslt, 0x44a0a400) \
+ V(umlalb, 0x44a09000) \
+ V(umlalt, 0x44a09400) \
+ V(umlslb, 0x44a0b000) \
+ V(umlslt, 0x44a0b400) \
+ V(sqdmullb, 0x44a0e000) \
+ V(sqdmullt, 0x44a0e400)
+
+#define VIXL_DEFINE_ASM_FUNC(MNE, OP) \
+ void Assembler::MNE(const ZRegister& zda, \
+ const ZRegister& zn, \
+ const ZRegister& zm, \
+ int index) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \
+ VIXL_ASSERT(AreSameLaneSize(zn, zm)); \
+ VIXL_ASSERT(zda.IsLaneSizeD() || zda.IsLaneSizeS()); \
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); \
+ Instr zm_with_index = SVEMulLongIndexHelper(zm, index); \
+ Emit(OP | SVESize(zda) | Rd(zda) | Rn(zn) | zm_with_index); \
+ }
+VIXL_SVE_MULL_INDEX_LIST(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FuNC
+
+void Assembler::smlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0100 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44004000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0100 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44004400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0101 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44005000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0101 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44005400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0110 10.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x04206800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0111 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45007000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0111 01.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45007400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqabs(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SQABS <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0100 0100 ..00 1000 101. .... .... ....
+ // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(0x4408a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1000 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44188000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqcadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // SQCADD <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, <const>
+ // 0100 0101 ..00 0001 1101 1... .... ....
+ // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+ VIXL_ASSERT((rot == 90) || (rot == 270));
+
+ Instr rotate_bit = (rot == 90) ? 0 : (1 << 10);
+ Emit(0x4501d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// sqdmlalb_z_zzzi_d
+// sqdmlalb_z_zzzi_s
+void Assembler::sqdmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQDMLALB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
+ // 0100 0100 111. .... 0010 .0.. .... ....
+ // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeH()) {
+ // Zm<18:16> | i3h<20:19> | i3l<11>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i2h<20> | i2l<11>
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3));
+ zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) |
+ Rx<19, 16>(zm);
+ }
+
+ Emit(0x44202000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqdmlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0110 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44006000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqdmlalbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLALBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0000 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44000800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// sqdmlalt_z_zzzi_d
+// sqdmlalt_z_zzzi_s
+void Assembler::sqdmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQDMLALT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
+ // 0100 0100 111. .... 0010 .1.. .... ....
+ // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeH()) {
+ // Zm<18:16> | i3h<20:19> | i3l<11>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i2h<20> | i2l<11>
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3));
+ zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) |
+ Rx<19, 16>(zm);
+ }
+
+ Emit(0x44202400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqdmlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0110 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44006400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// sqdmlslb_z_zzzi_d
+// sqdmlslb_z_zzzi_s
+void Assembler::sqdmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQDMLSLB <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
+ // 0100 0100 111. .... 0011 .0.. .... ....
+ // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeH()) {
+ // Zm<18:16> | i3h<20:19> | i3l<11>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i2h<20> | i2l<11>
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3));
+ zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) |
+ Rx<19, 16>(zm);
+ }
+
+ Emit(0x44203000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqdmlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0110 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44006800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqdmlslbt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLSLBT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0000 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44000c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// sqdmlslt_z_zzzi_d
+// sqdmlslt_z_zzzi_s
+void Assembler::sqdmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQDMLSLT <Zda>.D, <Zn>.S, <Zm>.S[<imm>]
+ // 0100 0100 111. .... 0011 .1.. .... ....
+ // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD());
+ VIXL_ASSERT(index >= 0);
+
+ Instr zm_and_idx = 0;
+ if (zm.IsLaneSizeH()) {
+ // Zm<18:16> | i3h<20:19> | i3l<11>
+ VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7));
+ zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) |
+ (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm);
+ } else {
+ // Zm<19:16> | i2h<20> | i2l<11>
+ VIXL_ASSERT(zm.IsLaneSizeS());
+ VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3));
+ zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) |
+ Rx<19, 16>(zm);
+ }
+
+ Emit(0x44203400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqdmlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0110 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x44006c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQDMULH <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
+ // 0100 0100 111. .... 1111 00.. .... ....
+ // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ 0x4420f000,
+ 0x44a0f000,
+ 0x44e0f000);
+
+ Emit(synthesised_op | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sqdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0111 00.. .... ....
+ // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x04207000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqdmullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0110 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45006000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqdmullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQDMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0110 01.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45006400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqneg(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // SQNEG <Zd>.<T>, <Pg>/M, <Zn>.<T>
+ // 0100 0100 ..00 1001 101. .... .... ....
+ // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+
+ Emit(0x4409a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ // SQRDCMLAH <Zda>.H, <Zn>.H, <Zm>.H[<imm>], <const>
+ // 0100 0100 101. .... 0111 .... .... ....
+ // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+ Instr rotate_bit = (rot / 90) << 10;
+ Emit(0x44a07000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) |
+ Rn(zn));
+}
+
+void Assembler::sqrdcmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ // SQRDCMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>, <const>
+ // 0100 0100 ..0. .... 0011 .... .... ....
+ // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+
+ Instr rotate_bit = (rot / 90) << 10;
+ Emit(0x44003000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// This prototype maps to 3 instruction encodings:
+// sqrdmlah_z_zzzi_d
+// sqrdmlah_z_zzzi_h
+// sqrdmlah_z_zzzi_s
+void Assembler::sqrdmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Instr op_h = 0x44201000;
+ Instr op_s = op_h | (1 << 23);
+ Instr op_d = op_h | (3 << 22);
+ // The encoding of opcode, index, Zm, and size are synthesized in this
+ // variable.
+ Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ op_h,
+ op_s,
+ op_d);
+
+ Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqrdmlah(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQRDMLAH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0100 ..0. .... 0111 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(0x44007000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+// This prototype maps to 3 instruction encodings:
+// sqrdmlsh_z_zzzi_d
+// sqrdmlsh_z_zzzi_h
+// sqrdmlsh_z_zzzi_s
+void Assembler::sqrdmlsh(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Instr op_h = 0x44201400;
+ Instr op_s = op_h | (1 << 23);
+ Instr op_d = op_h | (3 << 22);
+ // The encoding of opcode, index, Zm, and size are synthesized in this
+ // variable.
+ Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ op_h,
+ op_s,
+ op_d);
+
+ Emit(synthesized_op | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sqrdmlsh(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQRDMLSH <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0100 ..0. .... 0111 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(0x44007400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqrdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ // SQRDMULH <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
+ // 0100 0100 111. .... 1111 01.. .... ....
+ // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(),
+ zm,
+ index,
+ 0x4420f400,
+ 0x44a0f400,
+ 0x44e0f400);
+
+ Emit(synthesised_op | Rd(zd) | Rn(zn));
+}
+
+void Assembler::sqrdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQRDMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0111 01.. .... ....
+ // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x04207400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::sqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1010 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1110 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // SQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0110 100. .... .... ....
+ // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> |
+ // imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04068000);
+}
+
+void Assembler::sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1000 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44088000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1100 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqshlu(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // SQSHLU <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 1111 100. .... .... ....
+ // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> |
+ // imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040f8000);
+}
+
+void Assembler::sqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1010 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SQSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1110 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sqxtnb(const ZRegister& zd, const ZRegister& zn) {
+ // SQXTNB <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0100 00.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45204000);
+}
+
+void Assembler::sqxtnt(const ZRegister& zd, const ZRegister& zn) {
+ // SQXTNT <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0100 01.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45204400);
+}
+
+void Assembler::sqxtunb(const ZRegister& zd, const ZRegister& zn) {
+ // SQXTUNB <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0101 00.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45205000);
+}
+
+void Assembler::sqxtunt(const ZRegister& zd, const ZRegister& zn) {
+ // SQXTUNT <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0101 01.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45205400);
+}
+
+void Assembler::srhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SRHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0100 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::sri(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // SRI <Zd>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1111 00.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f000);
+}
+
+void Assembler::srshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 0010 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44028000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::srshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 0110 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44068000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::srshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // SRSHR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 1100 100. .... .... ....
+ // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> |
+ // imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040c8000);
+}
+
+void Assembler::srsra(const ZRegister& zda, const ZRegister& zn, int shift) {
+ // SRSRA <Zda>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1110 10.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> |
+ // Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e800);
+}
+
+void Assembler::sshllb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // SSHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+ // 0100 0101 0.0. .... 1010 00.. .... ....
+ // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a000);
+}
+
+void Assembler::sshllt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // SSHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+ // 0100 0101 0.0. .... 1010 01.. .... ....
+ // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a400);
+}
+
+void Assembler::ssra(const ZRegister& zda, const ZRegister& zn, int shift) {
+ // SSRA <Zda>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1110 00.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> |
+ // Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e000);
+}
+
+void Assembler::ssublb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0001 00.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45001000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ssublbt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBLBT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1000 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS());
+
+ Emit(0x45008800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ssublt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0001 01.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45001400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ssubltb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBLTB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1000 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS());
+
+ Emit(0x45008c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ssubwb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0101 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45005000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ssubwt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SSUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0101 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45005400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+#if 0
+// This prototype maps to 2 instruction encodings:
+// stnt1b_z_p_ar_d_64_unscaled
+// stnt1b_z_p_ar_s_x32_unscaled
+void Assembler::stnt1b(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) {
+ // STNT1B { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+ // 1110 0100 000. .... 001. .... .... ....
+ // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0xe4002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm));
+}
+
+void Assembler::stnt1d(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) {
+ // STNT1D { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+ // 1110 0101 100. .... 001. .... .... ....
+ // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0xe5802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// stnt1h_z_p_ar_d_64_unscaled
+// stnt1h_z_p_ar_s_x32_unscaled
+void Assembler::stnt1h(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) {
+ // STNT1H { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+ // 1110 0100 100. .... 001. .... .... ....
+ // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0xe4802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm));
+}
+
+// This prototype maps to 2 instruction encodings:
+// stnt1w_z_p_ar_d_64_unscaled
+// stnt1w_z_p_ar_s_x32_unscaled
+void Assembler::stnt1w(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) {
+ // STNT1W { <Zt>.D }, <Pg>, [<Zn>.D{, <Xm>}]
+ // 1110 0101 000. .... 001. .... .... ....
+ // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+
+ Emit(0xe5002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm));
+}
+#endif
+
+void Assembler::subhnb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUBHNB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0111 00.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45207000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::subhnt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUBHNT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..1. .... 0111 01.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45207400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::suqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // SUQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1100 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::tbl(const ZRegister& zd,
+ const ZRegister& zn1,
+ const ZRegister& zn2,
+ const ZRegister& zm) {
+ // TBL <Zd>.<T>, { <Zn1>.<T>, <Zn2>.<T> }, <Zm>.<T>
+ // 0000 0101 ..1. .... 0010 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0>
+
+ USE(zn2);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreConsecutive(zn1, zn2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2, zm));
+
+ Emit(0x05202800 | SVESize(zd) | Rd(zd) | Rn(zn1) | Rn(zn2) | Rm(zm));
+}
+
+void Assembler::tbx(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // TBX <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0101 ..1. .... 0010 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x05202c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uaba(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABA <Zda>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0100 0101 ..0. .... 1111 11.. .... ....
+ // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(0x4500fc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uabalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1100 10.. .... ....
+ // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4500c800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uabalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 1100 11.. .... ....
+ // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4500cc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uabdlb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0011 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45003800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uabdlt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UABDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0011 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45003c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uadalp(const ZRegister& zda,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // UADALP <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
+ // 0100 0100 ..00 0101 101. .... .... ....
+ // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x4405a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::uaddlb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UADDLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0000 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45000800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uaddlt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UADDLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0000 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45000c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uaddwb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UADDWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0100 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45004800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uaddwt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UADDWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0100 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45004c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0001 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44118000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uhsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UHSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0011 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44138000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uhsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UHSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0111 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMAXP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0101 101. .... .... ....
+ // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4415a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMINP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0111 101. .... .... ....
+ // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x4417a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::umlalb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMLALB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0100 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44004800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umlalt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMLALT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0100 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44004c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umlslb(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMLSLB <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0101 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44005800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umlslt(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMLSLT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0100 ..0. .... 0101 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zda.IsLaneSizeB());
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Emit(0x44005c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMULH <Zd>.<T>, <Zn>.<T>, <Zm>.<T>
+ // 0000 0100 ..1. .... 0110 11.. .... ....
+ // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x04206c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMULLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0111 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45007800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::umullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UMULLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0111 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2);
+
+ Emit(0x45007c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::uqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1001 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44198000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQRSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1011 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqrshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQRSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1111 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // UQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 0111 100. .... .... ....
+ // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> |
+ // imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04078000);
+}
+
+void Assembler::uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1001 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44098000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 1101 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x440d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQSUB <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1011 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqsubr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // UQSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1111 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::uqxtnb(const ZRegister& zd, const ZRegister& zn) {
+ // UQXTNB <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0100 10.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45204800);
+}
+
+void Assembler::uqxtnt(const ZRegister& zd, const ZRegister& zn) {
+ // UQXTNT <Zd>.<T>, <Zn>.<Tb>
+ // 0100 0101 0.1. .000 0100 11.. .... ....
+ // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2));
+
+ // XTN instructions look like immediate shifts with zero shift distance.
+ Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, size, 0x45204c00);
+}
+
+void Assembler::urecpe(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // URECPE <Zd>.S, <Pg>/M, <Zn>.S
+ // 0100 0100 ..00 0000 101. .... .... ....
+ // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS());
+
+ Emit(0x4400a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::urhadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // URHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 0101 100. .... .... ....
+ // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::urshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // URSHL <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 0011 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44038000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::urshlr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // URSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..00 0111 100. .... .... ....
+ // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> |
+ // Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x44078000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::urshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ // URSHR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, #<const>
+ // 0000 0100 ..00 1101 100. .... .... ....
+ // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> |
+ // imm3<7:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040d8000);
+}
+
+void Assembler::ursqrte(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn) {
+ // URSQRTE <Zd>.S, <Pg>/M, <Zn>.S
+ // 0100 0100 ..00 0001 101. .... .... ....
+ // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS());
+
+ Emit(0x4401a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn));
+}
+
+void Assembler::ursra(const ZRegister& zda, const ZRegister& zn, int shift) {
+ // URSRA <Zda>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1110 11.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> |
+ // Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500ec00);
+}
+
+void Assembler::ushllb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // USHLLB <Zd>.<T>, <Zn>.<Tb>, #<const>
+ // 0100 0101 0.0. .... 1010 10.. .... ....
+ // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a800);
+}
+
+void Assembler::ushllt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ // USHLLT <Zd>.<T>, <Zn>.<Tb>, #<const>
+ // 0100 0101 0.0. .... 1010 11.. .... ....
+ // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(!zd.IsLaneSizeB());
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+
+ Instr encoded_imm =
+ EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500ac00);
+}
+
+void Assembler::usqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // USQADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
+ // 0100 0100 ..01 1101 100. .... .... ....
+ // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn, zm));
+
+ Emit(0x441d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm));
+}
+
+void Assembler::usra(const ZRegister& zda, const ZRegister& zn, int shift) {
+ // USRA <Zda>.<T>, <Zn>.<T>, #<const>
+ // 0100 0101 ..0. .... 1110 01.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> |
+ // Zda<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn));
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits());
+
+ SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e400);
+}
+
+void Assembler::usublb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // USUBLB <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0001 10.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45001800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usublt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // USUBLT <Zd>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0001 11.. .... ....
+ // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> |
+ // Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45001c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usubwb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // USUBWB <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0101 10.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45005800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usubwt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ // USUBWT <Zd>.<T>, <Zn>.<T>, <Zm>.<Tb>
+ // 0100 0101 ..0. .... 0101 11.. .... ....
+ // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameLaneSize(zd, zn));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2));
+ VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes);
+
+ Emit(0x45005c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::whilege(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILEGE <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 00.. ...0 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(0x25200000 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilegt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILEGT <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 00.. ...1 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(0x25200010 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilehi(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILEHI <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 10.. ...1 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(0x25200810 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilehs(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILEHS <Pd>.<T>, <R><n>, <R><m>
+ // 0010 0101 ..1. .... 000. 10.. ...0 ....
+ // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> |
+ // Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(AreSameSizeAndType(rn, rm));
+ const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000;
+
+ Emit(0x25200800 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilerw(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILERW <Pd>.<T>, <Xn>, <Xm>
+ // 0010 0101 ..1. .... 0011 00.. ...1 ....
+ // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(rn.IsX() && rm.IsX());
+
+ Emit(0x25203010 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::whilewr(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ // WHILEWR <Pd>.<T>, <Xn>, <Xm>
+ // 0010 0101 ..1. .... 0011 00.. ...0 ....
+ // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0>
+
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(rn.IsX() && rm.IsX());
+
+ Emit(0x25203000 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm));
+}
+
+void Assembler::xar(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int shift) {
+ // XAR <Zdn>.<T>, <Zdn>.<T>, <Zm>.<T>, #<const>
+ // 0000 0100 ..1. .... 0011 01.. .... ....
+ // tszh<23:22> | tszl<20:19> | imm3<18:16> | Zm<9:5> | Zdn<4:0>
+
+ USE(zn);
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2));
+ VIXL_ASSERT(zd.Is(zn));
+ VIXL_ASSERT(AreSameLaneSize(zd, zm));
+
+ Instr encoded_imm =
+ EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits());
+ SVEBitwiseShiftImmediate(zd, zm, encoded_imm, 0x04203400);
+}
+
+void Assembler::fmmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT((CPUHas(CPUFeatures::kSVEF32MM) && zda.IsLaneSizeS()) ||
+ (CPUHas(CPUFeatures::kSVEF64MM) && zda.IsLaneSizeD()));
+ VIXL_ASSERT(AreSameLaneSize(zda, zn, zm));
+
+ Emit(0x6420e400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::smmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+
+ Emit(0x45009800 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usmmla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+
+ Emit(0x45809800 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::ummla(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+
+ Emit(0x45c09800 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+
+ Emit(0x44807800 | Rd(zda) | Rn(zn) | Rm(zm));
+}
+
+void Assembler::usdot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+ VIXL_ASSERT(zm.GetCode() <= 7);
+ VIXL_ASSERT(IsUint2(index));
+
+ Emit(0x44a01800 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn));
+}
+
+void Assembler::sudot(const ZRegister& zda,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVE));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM));
+ VIXL_ASSERT(zda.IsLaneSizeS());
+ VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB());
+ VIXL_ASSERT(zm.GetCode() <= 7);
+ VIXL_ASSERT(IsUint2(index));
+
+ Emit(0x44a01c00 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn));
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index d17c8894..f7512b2c 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -4404,11 +4404,11 @@ enum SVEVectorSelectOp {
SEL_z_p_zz = SVEVectorSelectFixed
};
-enum SVEVectorSplice_DestructiveOp {
- SVEVectorSplice_DestructiveFixed = 0x052C8000,
- SVEVectorSplice_DestructiveFMask = 0xFF3FE000,
- SVEVectorSplice_DestructiveMask = 0xFF3FE000,
- SPLICE_z_p_zz_des = SVEVectorSplice_DestructiveFixed
+enum SVEVectorSpliceOp {
+ SVEVectorSpliceFixed = 0x052C8000,
+ SVEVectorSpliceFMask = 0xFF3FE000,
+ SVEVectorSpliceMask = 0xFF3FE000,
+ SPLICE_z_p_zz_des = SVEVectorSpliceFixed
};
enum ReservedOp {
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index a31e010d..ae519928 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -78,11 +78,21 @@ const IDRegister::Field AA64ISAR1::kBF16(44);
const IDRegister::Field AA64ISAR1::kDGH(48);
const IDRegister::Field AA64ISAR1::kI8MM(52);
+const IDRegister::Field AA64ISAR2::kRPRES(4);
+
+const IDRegister::Field AA64MMFR0::kECV(60);
+
const IDRegister::Field AA64MMFR1::kLO(16);
+const IDRegister::Field AA64MMFR1::kAFP(44);
const IDRegister::Field AA64MMFR2::kAT(32);
+const IDRegister::Field AA64ZFR0::kSVEver(0);
+const IDRegister::Field AA64ZFR0::kAES(4);
+const IDRegister::Field AA64ZFR0::kBitPerm(16);
const IDRegister::Field AA64ZFR0::kBF16(20);
+const IDRegister::Field AA64ZFR0::kSHA3(32);
+const IDRegister::Field AA64ZFR0::kSM4(40);
const IDRegister::Field AA64ZFR0::kI8MM(44);
const IDRegister::Field AA64ZFR0::kF32MM(52);
const IDRegister::Field AA64ZFR0::kF64MM(56);
@@ -168,9 +178,22 @@ CPUFeatures AA64ISAR1::GetCPUFeatures() const {
return f;
}
+CPUFeatures AA64ISAR2::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
+ return f;
+}
+
+CPUFeatures AA64MMFR0::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
+ return f;
+}
+
CPUFeatures AA64MMFR1::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
+ if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
return f;
}
@@ -187,7 +210,13 @@ CPUFeatures AA64ZFR0::GetCPUFeatures() const {
if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
+ if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
+ if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
+ if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
+ if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
+ if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
+ if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
return f;
}
@@ -262,14 +291,15 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::kPAuthGeneric,
// Bits 32-39
CPUFeatures::kDCCVADP,
- CPUFeatures::kNone, // "sve2"
- CPUFeatures::kNone, // "sveaes"
- CPUFeatures::kNone, // "svepmull"
- CPUFeatures::kNone, // "svebitperm"
- CPUFeatures::kNone, // "svesha3"
- CPUFeatures::kNone, // "svesm4"
- CPUFeatures::kFrintToFixedSizedInt,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kSVEAES,
+ CPUFeatures::kSVEPmull128,
+ CPUFeatures::kSVEBitPerm,
+ CPUFeatures::kSVESHA3,
+ CPUFeatures::kSVESM4,
+ CPUFeatures::kAXFlag,
// Bits 40-47
+ CPUFeatures::kFrintToFixedSizedInt,
CPUFeatures::kSVEI8MM,
CPUFeatures::kSVEF32MM,
CPUFeatures::kSVEF64MM,
@@ -277,9 +307,13 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
CPUFeatures::kI8MM,
CPUFeatures::kBF16,
CPUFeatures::kDGH,
- CPUFeatures::kRNG,
// Bits 48+
- CPUFeatures::kBTI};
+ CPUFeatures::kRNG,
+ CPUFeatures::kBTI,
+ CPUFeatures::kMTE,
+ CPUFeatures::kECV,
+ CPUFeatures::kAFP,
+ CPUFeatures::kRPRES};
uint64_t hwcap_low32 = getauxval(AT_HWCAP);
uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
@@ -291,6 +325,10 @@ CPUFeatures CPU::InferCPUFeaturesFromOS(
for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
}
+ // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
+ if (features.Has(CPUFeatures::kMTE)) {
+ features.Combine(CPUFeatures::kMTEInstructions);
+ }
#endif // VIXL_USE_LINUX_HWCAP
if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index 2bf1e60f..892f48f2 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -160,6 +160,26 @@ class AA64ISAR1 : public IDRegister {
static const Field kI8MM;
};
+class AA64ISAR2 : public IDRegister {
+ public:
+ explicit AA64ISAR2(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kRPRES;
+};
+
+class AA64MMFR0 : public IDRegister {
+ public:
+ explicit AA64MMFR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kECV;
+};
+
class AA64MMFR1 : public IDRegister {
public:
explicit AA64MMFR1(uint64_t value) : IDRegister(value) {}
@@ -168,6 +188,7 @@ class AA64MMFR1 : public IDRegister {
private:
static const Field kLO;
+ static const Field kAFP;
};
class AA64MMFR2 : public IDRegister {
@@ -187,7 +208,12 @@ class AA64ZFR0 : public IDRegister {
CPUFeatures GetCPUFeatures() const;
private:
+ static const Field kSVEver;
+ static const Field kAES;
+ static const Field kBitPerm;
static const Field kBF16;
+ static const Field kSHA3;
+ static const Field kSM4;
static const Field kI8MM;
static const Field kF32MM;
static const Field kF64MM;
@@ -255,9 +281,11 @@ class CPU {
V(AA64PFR1, "ID_AA64PFR1_EL1") \
V(AA64ISAR0, "ID_AA64ISAR0_EL1") \
V(AA64ISAR1, "ID_AA64ISAR1_EL1") \
+ V(AA64MMFR0, "ID_AA64MMFR0_EL1") \
V(AA64MMFR1, "ID_AA64MMFR1_EL1") \
/* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
/* read them, but some compilers don't accept the symbolic names. */ \
+ V(AA64ISAR2, "S3_0_C0_C6_2") \
V(AA64MMFR2, "S3_0_C0_C7_2") \
V(AA64ZFR0, "S3_0_C0_C4_4")
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index abe63d39..63249b04 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -34,6 +34,43 @@
namespace vixl {
namespace aarch64 {
+
+const CPUFeaturesAuditor::FormToVisitorFnMap*
+CPUFeaturesAuditor::GetFormToVisitorFnMap() {
+ static const FormToVisitorFnMap form_to_visitor = {
+ DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor),
+ SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor),
+ {"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmulx_asimdelem_rh_h"_h,
+ &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmulx_asimdelem_r_sd"_h,
+ &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ {"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement},
+ };
+ return &form_to_visitor;
+}
+
// Every instruction must update last_instruction_, even if only to clear it,
// and every instruction must also update seen_ once it has been fully handled.
// This scope makes that simple, and allows early returns in the decode logic.
@@ -1186,8 +1223,8 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
V(SVEIntMulImm_Unpredicated) \
V(SVEIntMulVectors_Predicated) \
V(SVELoadAndBroadcastElement) \
- V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \
- V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \
+ V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \
+ V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \
V(SVELoadMultipleStructures_ScalarPlusImm) \
V(SVELoadMultipleStructures_ScalarPlusScalar) \
V(SVELoadPredicateRegister) \
@@ -1214,7 +1251,7 @@ void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) {
V(SVETableLookup) \
V(SVEUnpackPredicateElements) \
V(SVEUnpackVectorElements) \
- V(SVEVectorSplice_Destructive)
+ V(SVEVectorSplice)
#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \
void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \
@@ -1352,6 +1389,351 @@ void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
USE(instr);
}
+void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) {
+ VIXL_ASSERT(metadata->count("form") > 0);
+ const std::string& form = (*metadata)["form"];
+ uint32_t form_hash = Hash(form.c_str());
+ const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap();
+ FormToVisitorFnMap::const_iterator it = fv->find(form_hash);
+ if (it == fv->end()) {
+ RecordInstructionFeaturesScope scope(this);
+ std::map<uint32_t, const CPUFeatures> features = {
+ {"adclb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"adclt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"addhnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"addhnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"addp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"bcax_z_zzz"_h, CPUFeatures::kSVE2},
+ {"bdep_z_zz"_h,
+ CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+ {"bext_z_zz"_h,
+ CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+ {"bgrp_z_zz"_h,
+ CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)},
+ {"bsl1n_z_zzz"_h, CPUFeatures::kSVE2},
+ {"bsl2n_z_zzz"_h, CPUFeatures::kSVE2},
+ {"bsl_z_zzz"_h, CPUFeatures::kSVE2},
+ {"cadd_z_zz"_h, CPUFeatures::kSVE2},
+ {"cdot_z_zzz"_h, CPUFeatures::kSVE2},
+ {"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"cmla_z_zzz"_h, CPUFeatures::kSVE2},
+ {"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"eor3_z_zzz"_h, CPUFeatures::kSVE2},
+ {"eorbt_z_zz"_h, CPUFeatures::kSVE2},
+ {"eortb_z_zz"_h, CPUFeatures::kSVE2},
+ {"ext_z_zi_con"_h, CPUFeatures::kSVE2},
+ {"faddp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2},
+ {"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2},
+ {"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+ {"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2},
+ {"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+ {"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2},
+ {"flogb_z_p_z"_h, CPUFeatures::kSVE2},
+ {"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"fminp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"fmlalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"fmlalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"fmlslb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"fmlslt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"histcnt_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"histseg_z_zz"_h, CPUFeatures::kSVE2},
+ {"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"match_p_p_zz"_h, CPUFeatures::kSVE2},
+ {"mla_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"mla_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"mla_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"mls_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"mls_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"mls_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"mul_z_zz"_h, CPUFeatures::kSVE2},
+ {"mul_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"mul_z_zzi_h"_h, CPUFeatures::kSVE2},
+ {"mul_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"nbsl_z_zzz"_h, CPUFeatures::kSVE2},
+ {"nmatch_p_p_zz"_h, CPUFeatures::kSVE2},
+ {"pmul_z_zz"_h, CPUFeatures::kSVE2},
+ {"pmullb_z_zz"_h, CPUFeatures::kSVE2},
+ {"pmullt_z_zz"_h, CPUFeatures::kSVE2},
+ {"raddhnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"raddhnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"rshrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"rshrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"rsubhnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"rsubhnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"saba_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sabalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sabalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sabdlb_z_zz"_h, CPUFeatures::kSVE2},
+ {"sabdlt_z_zz"_h, CPUFeatures::kSVE2},
+ {"sadalp_z_p_z"_h, CPUFeatures::kSVE2},
+ {"saddlb_z_zz"_h, CPUFeatures::kSVE2},
+ {"saddlbt_z_zz"_h, CPUFeatures::kSVE2},
+ {"saddlt_z_zz"_h, CPUFeatures::kSVE2},
+ {"saddwb_z_zz"_h, CPUFeatures::kSVE2},
+ {"saddwt_z_zz"_h, CPUFeatures::kSVE2},
+ {"sbclb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sbclt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"shadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"shrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"shrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"shsub_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"shsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sli_z_zzi"_h, CPUFeatures::kSVE2},
+ {"smaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sminp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"smlalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"smlalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"smlslb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"smlslt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"smulh_z_zz"_h, CPUFeatures::kSVE2},
+ {"smullb_z_zz"_h, CPUFeatures::kSVE2},
+ {"smullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"smullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"smullt_z_zz"_h, CPUFeatures::kSVE2},
+ {"smullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"smullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"splice_z_p_zz_con"_h, CPUFeatures::kSVE2},
+ {"sqabs_z_p_z"_h, CPUFeatures::kSVE2},
+ {"sqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqcadd_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmulh_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
+ {"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmullb_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"sqdmullt_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"sqneg_z_p_z"_h, CPUFeatures::kSVE2},
+ {"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2},
+ {"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2},
+ {"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2},
+ {"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqshl_z_p_zi"_h, CPUFeatures::kSVE2},
+ {"sqshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2},
+ {"sqshrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqshrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqshrunb_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqshrunt_z_zi"_h, CPUFeatures::kSVE2},
+ {"sqsub_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sqxtnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqxtnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqxtunb_z_zz"_h, CPUFeatures::kSVE2},
+ {"sqxtunt_z_zz"_h, CPUFeatures::kSVE2},
+ {"srhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"sri_z_zzi"_h, CPUFeatures::kSVE2},
+ {"srshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"srshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"srshr_z_p_zi"_h, CPUFeatures::kSVE2},
+ {"srsra_z_zi"_h, CPUFeatures::kSVE2},
+ {"sshllb_z_zi"_h, CPUFeatures::kSVE2},
+ {"sshllt_z_zi"_h, CPUFeatures::kSVE2},
+ {"ssra_z_zi"_h, CPUFeatures::kSVE2},
+ {"ssublb_z_zz"_h, CPUFeatures::kSVE2},
+ {"ssublbt_z_zz"_h, CPUFeatures::kSVE2},
+ {"ssublt_z_zz"_h, CPUFeatures::kSVE2},
+ {"ssubltb_z_zz"_h, CPUFeatures::kSVE2},
+ {"ssubwb_z_zz"_h, CPUFeatures::kSVE2},
+ {"ssubwt_z_zz"_h, CPUFeatures::kSVE2},
+ {"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2},
+ {"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2},
+ {"subhnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"subhnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"suqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"tbl_z_zz_2"_h, CPUFeatures::kSVE2},
+ {"tbx_z_zz"_h, CPUFeatures::kSVE2},
+ {"uaba_z_zzz"_h, CPUFeatures::kSVE2},
+ {"uabalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"uabalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"uabdlb_z_zz"_h, CPUFeatures::kSVE2},
+ {"uabdlt_z_zz"_h, CPUFeatures::kSVE2},
+ {"uadalp_z_p_z"_h, CPUFeatures::kSVE2},
+ {"uaddlb_z_zz"_h, CPUFeatures::kSVE2},
+ {"uaddlt_z_zz"_h, CPUFeatures::kSVE2},
+ {"uaddwb_z_zz"_h, CPUFeatures::kSVE2},
+ {"uaddwt_z_zz"_h, CPUFeatures::kSVE2},
+ {"uhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uhsub_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"umaxp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uminp_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"umlalb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"umlalt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"umlslb_z_zzz"_h, CPUFeatures::kSVE2},
+ {"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"umlslt_z_zzz"_h, CPUFeatures::kSVE2},
+ {"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2},
+ {"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2},
+ {"umulh_z_zz"_h, CPUFeatures::kSVE2},
+ {"umullb_z_zz"_h, CPUFeatures::kSVE2},
+ {"umullb_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"umullb_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"umullt_z_zz"_h, CPUFeatures::kSVE2},
+ {"umullt_z_zzi_d"_h, CPUFeatures::kSVE2},
+ {"umullt_z_zzi_s"_h, CPUFeatures::kSVE2},
+ {"uqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"uqshl_z_p_zi"_h, CPUFeatures::kSVE2},
+ {"uqshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqshrnb_z_zi"_h, CPUFeatures::kSVE2},
+ {"uqshrnt_z_zi"_h, CPUFeatures::kSVE2},
+ {"uqsub_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"uqxtnb_z_zz"_h, CPUFeatures::kSVE2},
+ {"uqxtnt_z_zz"_h, CPUFeatures::kSVE2},
+ {"urecpe_z_p_z"_h, CPUFeatures::kSVE2},
+ {"urhadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"urshl_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"urshlr_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"urshr_z_p_zi"_h, CPUFeatures::kSVE2},
+ {"ursqrte_z_p_z"_h, CPUFeatures::kSVE2},
+ {"ursra_z_zi"_h, CPUFeatures::kSVE2},
+ {"ushllb_z_zi"_h, CPUFeatures::kSVE2},
+ {"ushllt_z_zi"_h, CPUFeatures::kSVE2},
+ {"usqadd_z_p_zz"_h, CPUFeatures::kSVE2},
+ {"usra_z_zi"_h, CPUFeatures::kSVE2},
+ {"usublb_z_zz"_h, CPUFeatures::kSVE2},
+ {"usublt_z_zz"_h, CPUFeatures::kSVE2},
+ {"usubwb_z_zz"_h, CPUFeatures::kSVE2},
+ {"usubwt_z_zz"_h, CPUFeatures::kSVE2},
+ {"whilege_p_p_rr"_h, CPUFeatures::kSVE2},
+ {"whilegt_p_p_rr"_h, CPUFeatures::kSVE2},
+ {"whilehi_p_p_rr"_h, CPUFeatures::kSVE2},
+ {"whilehs_p_p_rr"_h, CPUFeatures::kSVE2},
+ {"whilerw_p_rr"_h, CPUFeatures::kSVE2},
+ {"whilewr_p_rr"_h, CPUFeatures::kSVE2},
+ {"xar_z_zzi"_h, CPUFeatures::kSVE2},
+ {"smmla_z_zzz"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ {"ummla_z_zzz"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ {"usmmla_z_zzz"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ {"fmmla_z_zzz_s"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)},
+ {"fmmla_z_zzz_d"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"smmla_asimdsame2_g"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"ummla_asimdsame2_g"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"usmmla_asimdsame2_g"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"ld1row_z_p_bi_u32"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1row_z_p_br_contiguous"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1rod_z_p_bi_u64"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1rod_z_p_br_contiguous"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1rob_z_p_bi_u8"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1rob_z_p_br_contiguous"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1roh_z_p_bi_u16"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"ld1roh_z_p_br_contiguous"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)},
+ {"usdot_asimdsame2_d"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"sudot_asimdelem_d"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"usdot_asimdelem_d"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)},
+ {"usdot_z_zzz_s"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ {"usdot_z_zzzi_s"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ {"sudot_z_zzzi_s"_h,
+ CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)},
+ };
+
+ if (features.count(form_hash) > 0) {
+ scope.Record(features[form_hash]);
+ }
+ } else {
+ (it->second)(this, instr);
+ }
+}
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/cpu-features-auditor-aarch64.h b/src/aarch64/cpu-features-auditor-aarch64.h
index 23aec066..041bc88e 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.h
+++ b/src/aarch64/cpu-features-auditor-aarch64.h
@@ -27,10 +27,13 @@
#ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
#define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_
+#include <functional>
#include <iostream>
+#include <unordered_map>
#include "cpu-features.h"
#include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"
namespace vixl {
namespace aarch64 {
@@ -100,15 +103,16 @@ class CPUFeaturesAuditor : public DecoderVisitor {
SetAvailableFeatures(available);
}
-// Declare all Visitor functions.
-#define DECLARE(A) \
- virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
- VISITOR_LIST(DECLARE)
-#undef DECLARE
+ virtual void Visit(Metadata* metadata,
+ const Instruction* instr) VIXL_OVERRIDE;
private:
class RecordInstructionFeaturesScope;
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+ VISITOR_LIST(DECLARE)
+#undef DECLARE
+
void LoadStoreHelper(const Instruction* instr);
void LoadStorePairHelper(const Instruction* instr);
@@ -117,6 +121,11 @@ class CPUFeaturesAuditor : public DecoderVisitor {
CPUFeatures available_;
Decoder* decoder_;
+
+ using FormToVisitorFnMap = std::unordered_map<
+ uint32_t,
+ std::function<void(CPUFeaturesAuditor*, const Instruction*)>>;
+ static const FormToVisitorFnMap* GetFormToVisitorFnMap();
};
} // namespace aarch64
diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc
index c6859bbc..a4e2989e 100644
--- a/src/aarch64/decoder-aarch64.cc
+++ b/src/aarch64/decoder-aarch64.cc
@@ -49,7 +49,9 @@ void Decoder::Decode(Instruction* instr) {
}
void Decoder::AddDecodeNode(const DecodeNode& node) {
- decode_nodes_.insert(std::make_pair(node.GetName(), node));
+ if (decode_nodes_.count(node.GetName()) == 0) {
+ decode_nodes_.insert(std::make_pair(node.GetName(), node));
+ }
}
DecodeNode* Decoder::GetDecodeNode(std::string name) {
@@ -64,13 +66,22 @@ void Decoder::ConstructDecodeGraph() {
// Add all of the decoding nodes to the Decoder.
for (unsigned i = 0; i < ArrayLength(kDecodeMapping); i++) {
AddDecodeNode(DecodeNode(kDecodeMapping[i], this));
- }
- // Add the visitor function wrapping nodes to the Decoder.
- for (unsigned i = 0; i < ArrayLength(kVisitorNodes); i++) {
- AddDecodeNode(DecodeNode(kVisitorNodes[i], this));
+ // Add a node for each instruction form named, identified by having no '_'
+ // prefix on the node name.
+ const DecodeMapping& map = kDecodeMapping[i];
+ for (unsigned j = 0; j < map.mapping.size(); j++) {
+ if ((map.mapping[j].handler != NULL) &&
+ (map.mapping[j].handler[0] != '_')) {
+ AddDecodeNode(DecodeNode(map.mapping[j].handler, this));
+ }
+ }
}
+ // Add an "unallocated" node, used when an instruction encoding is not
+ // recognised by the decoding graph.
+ AddDecodeNode(DecodeNode("unallocated", this));
+
// Compile the graph from the root.
compiled_decoder_root_ = GetDecodeNode("Root")->Compile(this);
}
@@ -122,43 +133,18 @@ void Decoder::RemoveVisitor(DecoderVisitor* visitor) {
visitors_.remove(visitor);
}
-#define DEFINE_VISITOR_CALLERS(A) \
- void Decoder::Visit##A(const Instruction* instr) { \
- VIXL_ASSERT(((A##FMask == 0) && (A##Fixed == 0)) || \
- (instr->Mask(A##FMask) == A##Fixed)); \
- std::list<DecoderVisitor*>::iterator it; \
- for (it = visitors_.begin(); it != visitors_.end(); it++) { \
- (*it)->Visit##A(instr); \
- } \
- }
-VISITOR_LIST(DEFINE_VISITOR_CALLERS)
-#undef DEFINE_VISITOR_CALLERS
-
-void DecodeNode::SetSampledBits(const uint8_t* bits, int bit_count) {
- VIXL_ASSERT(!IsCompiled());
-
- sampled_bits_.resize(bit_count);
- for (int i = 0; i < bit_count; i++) {
- sampled_bits_[i] = bits[i];
+void Decoder::VisitNamedInstruction(const Instruction* instr,
+ const std::string& name) {
+ std::list<DecoderVisitor*>::iterator it;
+ Metadata m = {{"form", name}};
+ for (it = visitors_.begin(); it != visitors_.end(); it++) {
+ (*it)->Visit(&m, instr);
}
}
-std::vector<uint8_t> DecodeNode::GetSampledBits() const {
- return sampled_bits_;
-}
-
-size_t DecodeNode::GetSampledBitsCount() const { return sampled_bits_.size(); }
-
-void DecodeNode::AddPatterns(const DecodePattern* patterns) {
- VIXL_ASSERT(!IsCompiled());
- for (unsigned i = 0; i < kMaxDecodeMappings; i++) {
- // Empty string indicates end of patterns.
- if (patterns[i].pattern == NULL) break;
- VIXL_ASSERT((strlen(patterns[i].pattern) == GetSampledBitsCount()) ||
- (strcmp(patterns[i].pattern, "otherwise") == 0));
- pattern_table_.push_back(patterns[i]);
- }
-}
+// Initialise empty vectors for sampled bits and pattern table.
+const std::vector<uint8_t> DecodeNode::kEmptySampledBits;
+const std::vector<DecodePattern> DecodeNode::kEmptyPatternTable;
void DecodeNode::CompileNodeForBits(Decoder* decoder,
std::string name,
@@ -172,191 +158,246 @@ void DecodeNode::CompileNodeForBits(Decoder* decoder,
compiled_node_->SetNodeForBits(bits, n->GetCompiledNode());
}
-BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask) {
- // Instantiate a templated bit extraction function for every pattern we
- // might encounter. If the assertion in the default clause is reached, add a
- // new instantiation below using the information in the failure message.
- BitExtractFn bit_extract_fn = NULL;
- switch (mask) {
-#define INSTANTIATE_TEMPLATE(M) \
- case M: \
- bit_extract_fn = &Instruction::ExtractBits<M>; \
+
+#define INSTANTIATE_TEMPLATE_M(M) \
+ case 0x##M: \
+ bit_extract_fn = &Instruction::ExtractBits<0x##M>; \
+ break;
+#define INSTANTIATE_TEMPLATE_MV(M, V) \
+ case 0x##M##V: \
+ bit_extract_fn = &Instruction::IsMaskedValue<0x##M, 0x##V>; \
break;
- INSTANTIATE_TEMPLATE(0x000001e0);
- INSTANTIATE_TEMPLATE(0x00000400);
- INSTANTIATE_TEMPLATE(0x00000800);
- INSTANTIATE_TEMPLATE(0x00000c00);
- INSTANTIATE_TEMPLATE(0x00001000);
- INSTANTIATE_TEMPLATE(0x00001800);
- INSTANTIATE_TEMPLATE(0x00001c00);
- INSTANTIATE_TEMPLATE(0x00004000);
- INSTANTIATE_TEMPLATE(0x00008000);
- INSTANTIATE_TEMPLATE(0x0000f000);
- INSTANTIATE_TEMPLATE(0x0000fc00);
- INSTANTIATE_TEMPLATE(0x00060010);
- INSTANTIATE_TEMPLATE(0x00093e00);
- INSTANTIATE_TEMPLATE(0x000c1000);
- INSTANTIATE_TEMPLATE(0x00100000);
- INSTANTIATE_TEMPLATE(0x00101800);
- INSTANTIATE_TEMPLATE(0x00140000);
- INSTANTIATE_TEMPLATE(0x00180000);
- INSTANTIATE_TEMPLATE(0x00181000);
- INSTANTIATE_TEMPLATE(0x00190000);
- INSTANTIATE_TEMPLATE(0x00191400);
- INSTANTIATE_TEMPLATE(0x001c0000);
- INSTANTIATE_TEMPLATE(0x001c1800);
- INSTANTIATE_TEMPLATE(0x001f0000);
- INSTANTIATE_TEMPLATE(0x0020fc00);
- INSTANTIATE_TEMPLATE(0x0038f000);
- INSTANTIATE_TEMPLATE(0x00400000);
- INSTANTIATE_TEMPLATE(0x00400010);
- INSTANTIATE_TEMPLATE(0x0040f000);
- INSTANTIATE_TEMPLATE(0x00500000);
- INSTANTIATE_TEMPLATE(0x00800000);
- INSTANTIATE_TEMPLATE(0x00800010);
- INSTANTIATE_TEMPLATE(0x00801800);
- INSTANTIATE_TEMPLATE(0x009f0000);
- INSTANTIATE_TEMPLATE(0x00c00000);
- INSTANTIATE_TEMPLATE(0x00c00010);
- INSTANTIATE_TEMPLATE(0x00cf8000);
- INSTANTIATE_TEMPLATE(0x00db0000);
- INSTANTIATE_TEMPLATE(0x00dc0000);
- INSTANTIATE_TEMPLATE(0x00e00003);
- INSTANTIATE_TEMPLATE(0x00f80400);
- INSTANTIATE_TEMPLATE(0x01e00000);
- INSTANTIATE_TEMPLATE(0x03800000);
- INSTANTIATE_TEMPLATE(0x04c0f000);
- INSTANTIATE_TEMPLATE(0x10800400);
- INSTANTIATE_TEMPLATE(0x1e000000);
- INSTANTIATE_TEMPLATE(0x20000000);
- INSTANTIATE_TEMPLATE(0x20000410);
- INSTANTIATE_TEMPLATE(0x20007000);
- INSTANTIATE_TEMPLATE(0x20007800);
- INSTANTIATE_TEMPLATE(0x2000f000);
- INSTANTIATE_TEMPLATE(0x2000f800);
- INSTANTIATE_TEMPLATE(0x201e0c00);
- INSTANTIATE_TEMPLATE(0x20803800);
- INSTANTIATE_TEMPLATE(0x20c0cc00);
- INSTANTIATE_TEMPLATE(0x20c0f000);
- INSTANTIATE_TEMPLATE(0x20c0f800);
- INSTANTIATE_TEMPLATE(0x20c1f000);
- INSTANTIATE_TEMPLATE(0x51e00000);
- INSTANTIATE_TEMPLATE(0x60007800);
- INSTANTIATE_TEMPLATE(0x6000f800);
- INSTANTIATE_TEMPLATE(0x601e0000);
- INSTANTIATE_TEMPLATE(0x80007c00);
- INSTANTIATE_TEMPLATE(0x80017c00);
- INSTANTIATE_TEMPLATE(0x80408000);
- INSTANTIATE_TEMPLATE(0x80a07c00);
- INSTANTIATE_TEMPLATE(0x80df0000);
- INSTANTIATE_TEMPLATE(0x80e08000);
- INSTANTIATE_TEMPLATE(0xa0c00000);
- INSTANTIATE_TEMPLATE(0xb5a00000);
- INSTANTIATE_TEMPLATE(0xc0c00c00);
- INSTANTIATE_TEMPLATE(0xc4400000);
- INSTANTIATE_TEMPLATE(0xc4c00000);
- INSTANTIATE_TEMPLATE(0xe0400000);
- INSTANTIATE_TEMPLATE(0xe120e000);
- INSTANTIATE_TEMPLATE(0xe3c00000);
- INSTANTIATE_TEMPLATE(0xf1200000);
-#undef INSTANTIATE_TEMPLATE
- default:
- printf("Node %s: No template instantiated for extracting 0x%08x.\n",
- GetName().c_str(),
- GenerateSampledBitsMask());
- printf("Add one in %s above line %d:\n", __FILE__, __LINE__);
- printf(" INSTANTIATE_TEMPLATE(0x%08x);\n", GenerateSampledBitsMask());
- VIXL_UNREACHABLE();
- }
- return bit_extract_fn;
-}
-BitExtractFn DecodeNode::GetBitExtractFunction(uint32_t mask, uint32_t value) {
+BitExtractFn DecodeNode::GetBitExtractFunctionHelper(uint32_t x, uint32_t y) {
// Instantiate a templated bit extraction function for every pattern we
- // might encounter. If the assertion in the following check fails, add a
+ // might encounter. If the assertion in the default clause is reached, add a
// new instantiation below using the information in the failure message.
- bool instantiated = false;
BitExtractFn bit_extract_fn = NULL;
-#define INSTANTIATE_TEMPLATE(M, V) \
- if ((mask == M) && (value == V)) { \
- bit_extract_fn = &Instruction::IsMaskedValue<M, V>; \
- instantiated = true; \
- }
- INSTANTIATE_TEMPLATE(0x0000001c, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00000210, 0x00000000);
- INSTANTIATE_TEMPLATE(0x000003c0, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00001c00, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00001c0f, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00003000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00007800, 0x00000000);
- INSTANTIATE_TEMPLATE(0x0000e000, 0x0000a000);
- INSTANTIATE_TEMPLATE(0x0000f000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00030400, 0x00000000);
- INSTANTIATE_TEMPLATE(0x0003801f, 0x0000000d);
- INSTANTIATE_TEMPLATE(0x00060210, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00060810, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00060a10, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00060bf0, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00061e10, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00061e10, 0x00000400);
- INSTANTIATE_TEMPLATE(0x00070200, 0x00000000);
- INSTANTIATE_TEMPLATE(0x000b1e10, 0x00000000);
- INSTANTIATE_TEMPLATE(0x000f0000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00130e1f, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00130fff, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00180000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00180000, 0x00100000);
- INSTANTIATE_TEMPLATE(0x001e0000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x001f0000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x001f0000, 0x001f0000);
- INSTANTIATE_TEMPLATE(0x0038e000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x0039e000, 0x00002000);
- INSTANTIATE_TEMPLATE(0x003ae000, 0x00002000);
- INSTANTIATE_TEMPLATE(0x003ce000, 0x00042000);
- INSTANTIATE_TEMPLATE(0x005f0000, 0x001f0000);
- INSTANTIATE_TEMPLATE(0x00780000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00870210, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00c00000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x00c00000, 0x00800000);
- INSTANTIATE_TEMPLATE(0x00c00000, 0x00c00000);
- INSTANTIATE_TEMPLATE(0x00c00010, 0x00800000);
- INSTANTIATE_TEMPLATE(0x00ca1e10, 0x00000000);
- INSTANTIATE_TEMPLATE(0x01000010, 0x00000000);
- INSTANTIATE_TEMPLATE(0x20000800, 0x00000000);
- INSTANTIATE_TEMPLATE(0x20008000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x20040000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x201e8000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x60000000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x60000000, 0x20000000);
- INSTANTIATE_TEMPLATE(0x60000000, 0x60000000);
- INSTANTIATE_TEMPLATE(0x60200000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x80008000, 0x00000000);
- INSTANTIATE_TEMPLATE(0x80008000, 0x00008000);
- INSTANTIATE_TEMPLATE(0x80400000, 0x00400000);
- INSTANTIATE_TEMPLATE(0xa00003e0, 0x00000000);
- INSTANTIATE_TEMPLATE(0xa000c007, 0x00000000);
- INSTANTIATE_TEMPLATE(0xa0100000, 0x00000000);
- INSTANTIATE_TEMPLATE(0xc4000000, 0xc0000000);
- INSTANTIATE_TEMPLATE(0xc4000000, 0xc4000000);
- INSTANTIATE_TEMPLATE(0xe0000010, 0xa0000000);
- INSTANTIATE_TEMPLATE(0xe01c0000, 0x20000000);
- INSTANTIATE_TEMPLATE(0xe1ff0000, 0x00000000);
-#undef INSTANTIATE_TEMPLATE
-
- if (!instantiated) {
- printf(
- "Node %s: no template instantiated for mask 0x%08x, value = "
- "0x%08x.\n",
- GetName().c_str(),
- mask,
- value);
- printf("Add one in %s above line %d:\n", __FILE__, __LINE__);
- printf(" INSTANTIATE_TEMPLATE(0x%08x, 0x%08x);\n", mask, value);
- VIXL_UNREACHABLE();
+
+ // The arguments x and y represent the mask and value. If y is 0, x is the
+ // mask. Otherwise, y is the mask, and x is the value to compare against a
+ // masked result.
+ uint64_t signature = (static_cast<uint64_t>(y) << 32) | x;
+ switch (signature) {
+ INSTANTIATE_TEMPLATE_M(00000001);
+ INSTANTIATE_TEMPLATE_M(00000010);
+ INSTANTIATE_TEMPLATE_M(0000001f);
+ INSTANTIATE_TEMPLATE_M(00000060);
+ INSTANTIATE_TEMPLATE_M(00000100);
+ INSTANTIATE_TEMPLATE_M(00000200);
+ INSTANTIATE_TEMPLATE_M(00000400);
+ INSTANTIATE_TEMPLATE_M(00000800);
+ INSTANTIATE_TEMPLATE_M(00000c00);
+ INSTANTIATE_TEMPLATE_M(00000c10);
+ INSTANTIATE_TEMPLATE_M(00000fc0);
+ INSTANTIATE_TEMPLATE_M(00001000);
+ INSTANTIATE_TEMPLATE_M(00001400);
+ INSTANTIATE_TEMPLATE_M(00001800);
+ INSTANTIATE_TEMPLATE_M(00001c00);
+ INSTANTIATE_TEMPLATE_M(00002000);
+ INSTANTIATE_TEMPLATE_M(00002010);
+ INSTANTIATE_TEMPLATE_M(00002400);
+ INSTANTIATE_TEMPLATE_M(00003000);
+ INSTANTIATE_TEMPLATE_M(00003020);
+ INSTANTIATE_TEMPLATE_M(00003400);
+ INSTANTIATE_TEMPLATE_M(00003800);
+ INSTANTIATE_TEMPLATE_M(00003c00);
+ INSTANTIATE_TEMPLATE_M(00013000);
+ INSTANTIATE_TEMPLATE_M(00020000);
+ INSTANTIATE_TEMPLATE_M(00020010);
+ INSTANTIATE_TEMPLATE_M(000203e0);
+ INSTANTIATE_TEMPLATE_M(000303e0);
+ INSTANTIATE_TEMPLATE_M(00060000);
+ INSTANTIATE_TEMPLATE_M(00061000);
+ INSTANTIATE_TEMPLATE_M(00070000);
+ INSTANTIATE_TEMPLATE_M(000703c0);
+ INSTANTIATE_TEMPLATE_M(00080000);
+ INSTANTIATE_TEMPLATE_M(00090000);
+ INSTANTIATE_TEMPLATE_M(000f0000);
+ INSTANTIATE_TEMPLATE_M(000f0010);
+ INSTANTIATE_TEMPLATE_M(00100000);
+ INSTANTIATE_TEMPLATE_M(00180000);
+ INSTANTIATE_TEMPLATE_M(001d1c00);
+ INSTANTIATE_TEMPLATE_M(001f0000);
+ INSTANTIATE_TEMPLATE_M(001f2000);
+ INSTANTIATE_TEMPLATE_M(001f3000);
+ INSTANTIATE_TEMPLATE_M(00400000);
+ INSTANTIATE_TEMPLATE_M(00400800);
+ INSTANTIATE_TEMPLATE_M(00403000);
+ INSTANTIATE_TEMPLATE_M(00500800);
+ INSTANTIATE_TEMPLATE_M(00583000);
+ INSTANTIATE_TEMPLATE_M(005f0000);
+ INSTANTIATE_TEMPLATE_M(00800000);
+ INSTANTIATE_TEMPLATE_M(00800400);
+ INSTANTIATE_TEMPLATE_M(00800c1e);
+ INSTANTIATE_TEMPLATE_M(0080101f);
+ INSTANTIATE_TEMPLATE_M(00801c00);
+ INSTANTIATE_TEMPLATE_M(00803000);
+ INSTANTIATE_TEMPLATE_M(00803c00);
+ INSTANTIATE_TEMPLATE_M(009f0000);
+ INSTANTIATE_TEMPLATE_M(009f2000);
+ INSTANTIATE_TEMPLATE_M(00c00000);
+ INSTANTIATE_TEMPLATE_M(00c00010);
+ INSTANTIATE_TEMPLATE_M(00c0001f);
+ INSTANTIATE_TEMPLATE_M(00c00200);
+ INSTANTIATE_TEMPLATE_M(00c00400);
+ INSTANTIATE_TEMPLATE_M(00c00c00);
+ INSTANTIATE_TEMPLATE_M(00c00c1c);
+ INSTANTIATE_TEMPLATE_M(00c01000);
+ INSTANTIATE_TEMPLATE_M(00c01400);
+ INSTANTIATE_TEMPLATE_M(00c01c00);
+ INSTANTIATE_TEMPLATE_M(00c02000);
+ INSTANTIATE_TEMPLATE_M(00c03000);
+ INSTANTIATE_TEMPLATE_M(00c03c00);
+ INSTANTIATE_TEMPLATE_M(00c83000);
+ INSTANTIATE_TEMPLATE_M(00cf0000);
+ INSTANTIATE_TEMPLATE_M(00d00200);
+ INSTANTIATE_TEMPLATE_M(00d80800);
+ INSTANTIATE_TEMPLATE_M(00d81800);
+ INSTANTIATE_TEMPLATE_M(00d81c00);
+ INSTANTIATE_TEMPLATE_M(00d82800);
+ INSTANTIATE_TEMPLATE_M(00d82c00);
+ INSTANTIATE_TEMPLATE_M(00d92400);
+ INSTANTIATE_TEMPLATE_M(00d93000);
+ INSTANTIATE_TEMPLATE_M(00db0000);
+ INSTANTIATE_TEMPLATE_M(00dc0000);
+ INSTANTIATE_TEMPLATE_M(00dc2000);
+ INSTANTIATE_TEMPLATE_M(00dd2000);
+ INSTANTIATE_TEMPLATE_M(00df0000);
+ INSTANTIATE_TEMPLATE_M(40000000);
+ INSTANTIATE_TEMPLATE_M(40000010);
+ INSTANTIATE_TEMPLATE_M(40000c00);
+ INSTANTIATE_TEMPLATE_M(40002000);
+ INSTANTIATE_TEMPLATE_M(40002010);
+ INSTANTIATE_TEMPLATE_M(40003000);
+ INSTANTIATE_TEMPLATE_M(40003c00);
+ INSTANTIATE_TEMPLATE_M(400f0000);
+ INSTANTIATE_TEMPLATE_M(400f0400);
+ INSTANTIATE_TEMPLATE_M(401f2000);
+ INSTANTIATE_TEMPLATE_M(40400800);
+ INSTANTIATE_TEMPLATE_M(40400c00);
+ INSTANTIATE_TEMPLATE_M(40403c00);
+ INSTANTIATE_TEMPLATE_M(40800000);
+ INSTANTIATE_TEMPLATE_M(40800c00);
+ INSTANTIATE_TEMPLATE_M(40802000);
+ INSTANTIATE_TEMPLATE_M(40802010);
+ INSTANTIATE_TEMPLATE_M(40803400);
+ INSTANTIATE_TEMPLATE_M(40803c00);
+ INSTANTIATE_TEMPLATE_M(40c00000);
+ INSTANTIATE_TEMPLATE_M(40c00c00);
+ INSTANTIATE_TEMPLATE_M(40c00c10);
+ INSTANTIATE_TEMPLATE_M(40c01c00);
+ INSTANTIATE_TEMPLATE_M(40c02000);
+ INSTANTIATE_TEMPLATE_M(40c02010);
+ INSTANTIATE_TEMPLATE_M(40c02c00);
+ INSTANTIATE_TEMPLATE_M(40c03c00);
+ INSTANTIATE_TEMPLATE_M(40c80000);
+ INSTANTIATE_TEMPLATE_M(40c90000);
+ INSTANTIATE_TEMPLATE_M(40cf0000);
+ INSTANTIATE_TEMPLATE_M(40d02000);
+ INSTANTIATE_TEMPLATE_M(40d02010);
+ INSTANTIATE_TEMPLATE_M(40d80000);
+ INSTANTIATE_TEMPLATE_M(40d81800);
+ INSTANTIATE_TEMPLATE_M(bf20c000);
+ INSTANTIATE_TEMPLATE_MV(00000003, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00000003, 00000003);
+ INSTANTIATE_TEMPLATE_MV(0000001f, 0000001f);
+ INSTANTIATE_TEMPLATE_MV(00000210, 00000000);
+ INSTANTIATE_TEMPLATE_MV(000003e0, 00000000);
+ INSTANTIATE_TEMPLATE_MV(000003e0, 000003e0);
+ INSTANTIATE_TEMPLATE_MV(000003e1, 000003e0);
+ INSTANTIATE_TEMPLATE_MV(000003e3, 000003e0);
+ INSTANTIATE_TEMPLATE_MV(000003e3, 000003e3);
+ INSTANTIATE_TEMPLATE_MV(00000c00, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00000fc0, 00000000);
+ INSTANTIATE_TEMPLATE_MV(000013e0, 00001000);
+ INSTANTIATE_TEMPLATE_MV(00001c00, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00002400, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00003000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00003000, 00001000);
+ INSTANTIATE_TEMPLATE_MV(00003000, 00002000);
+ INSTANTIATE_TEMPLATE_MV(00003000, 00003000);
+ INSTANTIATE_TEMPLATE_MV(00003010, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00060000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00061000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00070000, 00030000);
+ INSTANTIATE_TEMPLATE_MV(0007309f, 0000001f);
+ INSTANTIATE_TEMPLATE_MV(00073ee0, 00033060);
+ INSTANTIATE_TEMPLATE_MV(000f0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(000f0010, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00100200, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00100210, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00160000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00170000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001c0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001d0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001e0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001f0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001f0000, 00010000);
+ INSTANTIATE_TEMPLATE_MV(001f0000, 00100000);
+ INSTANTIATE_TEMPLATE_MV(001f0000, 001f0000);
+ INSTANTIATE_TEMPLATE_MV(001f3000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(001f3000, 001f0000);
+ INSTANTIATE_TEMPLATE_MV(001f300f, 0000000d);
+ INSTANTIATE_TEMPLATE_MV(001f301f, 0000000d);
+ INSTANTIATE_TEMPLATE_MV(001f33e0, 000103e0);
+ INSTANTIATE_TEMPLATE_MV(001f3800, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00401000, 00400000);
+ INSTANTIATE_TEMPLATE_MV(00403000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(005f3000, 001f0000);
+ INSTANTIATE_TEMPLATE_MV(005f3000, 001f1000);
+ INSTANTIATE_TEMPLATE_MV(00800010, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00800400, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00800410, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00803000, 00002000);
+ INSTANTIATE_TEMPLATE_MV(00870000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(009f0000, 00010000);
+ INSTANTIATE_TEMPLATE_MV(00c00000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00c00000, 00400000);
+ INSTANTIATE_TEMPLATE_MV(00c0001f, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00c001ff, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00c00200, 00400000);
+ INSTANTIATE_TEMPLATE_MV(00c0020f, 00400000);
+ INSTANTIATE_TEMPLATE_MV(00c003e0, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00c00800, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00d80800, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00df0000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(00df3800, 001f0800);
+ INSTANTIATE_TEMPLATE_MV(40002000, 40000000);
+ INSTANTIATE_TEMPLATE_MV(40003c00, 00000000);
+ INSTANTIATE_TEMPLATE_MV(40040000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(40800c00, 40000400);
+ INSTANTIATE_TEMPLATE_MV(40c00000, 00000000);
+ INSTANTIATE_TEMPLATE_MV(40c00000, 00400000);
+ INSTANTIATE_TEMPLATE_MV(40c00000, 40000000);
+ INSTANTIATE_TEMPLATE_MV(40c00000, 40800000);
+ INSTANTIATE_TEMPLATE_MV(40df0000, 00000000);
+ default: {
+ static bool printed_preamble = false;
+ if (!printed_preamble) {
+ printf("One or more missing template instantiations.\n");
+ printf(
+ "Add the following to either GetBitExtractFunction() "
+ "implementations\n");
+ printf("in %s near line %d:\n", __FILE__, __LINE__);
+ printed_preamble = true;
+ }
+
+ if (y == 0) {
+ printf(" INSTANTIATE_TEMPLATE_M(%08x);\n", x);
+ bit_extract_fn = &Instruction::ExtractBitsAbsent;
+ } else {
+ printf(" INSTANTIATE_TEMPLATE_MV(%08x, %08x);\n", y, x);
+ bit_extract_fn = &Instruction::IsMaskedValueAbsent;
+ }
+ }
}
return bit_extract_fn;
}
+#undef INSTANTIATE_TEMPLATE_M
+#undef INSTANTIATE_TEMPLATE_MV
+
bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
// EitherOr optimisation: if there are only one or two patterns in the table,
// try to optimise the node to exploit that.
@@ -364,21 +405,22 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
if ((table_size <= 2) && (GetSampledBitsCount() > 1)) {
// TODO: support 'x' in this optimisation by dropping the sampled bit
// positions before making the mask/value.
- if ((strchr(pattern_table_[0].pattern, 'x') == NULL) &&
- ((table_size == 1) ||
- (strcmp(pattern_table_[1].pattern, "otherwise") == 0))) {
+ if (!PatternContainsSymbol(pattern_table_[0].pattern,
+ PatternSymbol::kSymbolX) &&
+ (table_size == 1)) {
// A pattern table consisting of a fixed pattern with no x's, and an
// "otherwise" or absent case. Optimise this into an instruction mask and
// value test.
uint32_t single_decode_mask = 0;
uint32_t single_decode_value = 0;
- std::vector<uint8_t> bits = GetSampledBits();
+ const std::vector<uint8_t>& bits = GetSampledBits();
// Construct the instruction mask and value from the pattern.
- VIXL_ASSERT(bits.size() == strlen(pattern_table_[0].pattern));
+ VIXL_ASSERT(bits.size() == GetPatternLength(pattern_table_[0].pattern));
for (size_t i = 0; i < bits.size(); i++) {
single_decode_mask |= 1U << bits[i];
- if (pattern_table_[0].pattern[i] == '1') {
+ if (GetSymbolAt(pattern_table_[0].pattern, i) ==
+ PatternSymbol::kSymbol1) {
single_decode_value |= 1U << bits[i];
}
}
@@ -391,9 +433,7 @@ bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) {
// Set DecodeNode for when the instruction after masking doesn't match the
// value.
- const char* doesnt_match_handler =
- (table_size == 1) ? "VisitUnallocated" : pattern_table_[1].handler;
- CompileNodeForBits(decoder, doesnt_match_handler, 0);
+ CompileNodeForBits(decoder, "unallocated", 0);
// Set DecodeNode for when it does match.
CompileNodeForBits(decoder, pattern_table_[0].handler, 1);
@@ -411,21 +451,14 @@ CompiledDecodeNode* DecodeNode::Compile(Decoder* decoder) {
CreateVisitorNode();
} else if (!TryCompileOptimisedDecodeTable(decoder)) {
// The "otherwise" node is the default next node if no pattern matches.
- std::string otherwise = "VisitUnallocated";
+ std::string otherwise = "unallocated";
// For each pattern in pattern_table_, create an entry in matches that
// has a corresponding mask and value for the pattern.
std::vector<MaskValuePair> matches;
for (size_t i = 0; i < pattern_table_.size(); i++) {
- if (strcmp(pattern_table_[i].pattern, "otherwise") == 0) {
- // "otherwise" must be the last pattern in the list, otherwise the
- // indices won't match for pattern_table_ and matches.
- VIXL_ASSERT(i == pattern_table_.size() - 1);
- otherwise = pattern_table_[i].handler;
- } else {
- matches.push_back(GenerateMaskValuePair(
- GenerateOrderedPattern(pattern_table_[i].pattern)));
- }
+ matches.push_back(GenerateMaskValuePair(
+ GenerateOrderedPattern(pattern_table_[i].pattern)));
}
BitExtractFn bit_extract_fn =
@@ -466,7 +499,7 @@ void CompiledDecodeNode::Decode(const Instruction* instr) const {
if (IsLeafNode()) {
// If this node is a leaf, call the registered visitor function.
VIXL_ASSERT(decoder_ != NULL);
- (decoder_->*visitor_fn_)(instr);
+ decoder_->VisitNamedInstruction(instr, instruction_name_);
} else {
// Otherwise, using the sampled bit extractor for this node, look up the
// next node in the decode tree, and call its Decode method.
@@ -478,41 +511,53 @@ void CompiledDecodeNode::Decode(const Instruction* instr) const {
}
DecodeNode::MaskValuePair DecodeNode::GenerateMaskValuePair(
- std::string pattern) const {
+ uint32_t pattern) const {
uint32_t mask = 0, value = 0;
- for (size_t i = 0; i < pattern.size(); i++) {
- mask |= ((pattern[i] == 'x') ? 0 : 1) << i;
- value |= ((pattern[i] == '1') ? 1 : 0) << i;
+ for (size_t i = 0; i < GetPatternLength(pattern); i++) {
+ PatternSymbol sym = GetSymbolAt(pattern, i);
+ mask = (mask << 1) | ((sym == PatternSymbol::kSymbolX) ? 0 : 1);
+ value = (value << 1) | (static_cast<uint32_t>(sym) & 1);
}
return std::make_pair(mask, value);
}
-std::string DecodeNode::GenerateOrderedPattern(std::string pattern) const {
- std::vector<uint8_t> sampled_bits = GetSampledBits();
- // Construct a temporary 32-character string containing '_', then at each
- // sampled bit position, set the corresponding pattern character.
- std::string temp(32, '_');
+uint32_t DecodeNode::GenerateOrderedPattern(uint32_t pattern) const {
+ const std::vector<uint8_t>& sampled_bits = GetSampledBits();
+ uint64_t temp = 0xffffffffffffffff;
+
+ // Place symbols into the field of set bits. Symbols are two bits wide and
+ // take values 0, 1 or 2, so 3 will represent "no symbol".
for (size_t i = 0; i < sampled_bits.size(); i++) {
- temp[sampled_bits[i]] = pattern[i];
+ int shift = sampled_bits[i] * 2;
+ temp ^= static_cast<uint64_t>(kEndOfPattern) << shift;
+ temp |= static_cast<uint64_t>(GetSymbolAt(pattern, i)) << shift;
}
- // Iterate through the temporary string, filtering out the non-'_' characters
- // into a new ordered pattern result string.
- std::string result;
- for (size_t i = 0; i < temp.size(); i++) {
- if (temp[i] != '_') {
- result.push_back(temp[i]);
+ // Iterate over temp and extract new pattern ordered by sample position.
+ uint32_t result = kEndOfPattern; // End of pattern marker.
+
+ // Iterate over the pattern one symbol (two bits) at a time.
+ for (int i = 62; i >= 0; i -= 2) {
+ uint32_t sym = (temp >> i) & kPatternSymbolMask;
+
+ // If this is a valid symbol, shift into the result.
+ if (sym != kEndOfPattern) {
+ result = (result << 2) | sym;
}
}
- VIXL_ASSERT(result.size() == sampled_bits.size());
+
+ // The length of the ordered pattern must be the same as the input pattern,
+ // and the number of sampled bits.
+ VIXL_ASSERT(GetPatternLength(result) == GetPatternLength(pattern));
+ VIXL_ASSERT(GetPatternLength(result) == sampled_bits.size());
+
return result;
}
uint32_t DecodeNode::GenerateSampledBitsMask() const {
- std::vector<uint8_t> sampled_bits = GetSampledBits();
uint32_t mask = 0;
- for (size_t i = 0; i < sampled_bits.size(); i++) {
- mask |= 1 << sampled_bits[i];
+ for (int bit : GetSampledBits()) {
+ mask |= 1 << bit;
}
return mask;
}
diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h
index 38540195..22c66e82 100644
--- a/src/aarch64/decoder-aarch64.h
+++ b/src/aarch64/decoder-aarch64.h
@@ -35,9 +35,7 @@
#include "instructions-aarch64.h"
-
// List macro containing all visitors needed by the decoder class.
-
#define VISITOR_LIST_THAT_RETURN(V) \
V(AddSubExtended) \
V(AddSubImmediate) \
@@ -231,8 +229,8 @@
V(SVEIntMulImm_Unpredicated) \
V(SVEIntMulVectors_Predicated) \
V(SVELoadAndBroadcastElement) \
- V(SVELoadAndBroadcastQuadword_ScalarPlusImm) \
- V(SVELoadAndBroadcastQuadword_ScalarPlusScalar) \
+ V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \
+ V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \
V(SVELoadMultipleStructures_ScalarPlusImm) \
V(SVELoadMultipleStructures_ScalarPlusScalar) \
V(SVELoadPredicateRegister) \
@@ -259,7 +257,7 @@
V(SVETableLookup) \
V(SVEUnpackPredicateElements) \
V(SVEUnpackVectorElements) \
- V(SVEVectorSplice_Destructive) \
+ V(SVEVectorSplice) \
V(System) \
V(TestBranch) \
V(Unallocated) \
@@ -276,14 +274,12 @@
namespace vixl {
namespace aarch64 {
-// The Visitor interface. Disassembler and simulator (and other tools)
-// must provide implementations for all of these functions.
-//
-// Note that this class must change in breaking ways with even minor additions
-// to VIXL, and so its API should be considered unstable. User classes that
-// inherit from this one should be expected to break even on minor version
-// updates. If this is a problem, consider using DecoderVisitorWithDefaults
-// instead.
+using Metadata = std::map<std::string, std::string>;
+
+// The Visitor interface consists only of the Visit() method. User classes
+// that inherit from this one must provide an implementation of the method.
+// Information about the instruction encountered by the Decoder is available
+// via the metadata pointer.
class DecoderVisitor {
public:
enum VisitorConstness { kConstVisitor, kNonConstVisitor };
@@ -292,9 +288,7 @@ class DecoderVisitor {
virtual ~DecoderVisitor() {}
-#define DECLARE(A) virtual void Visit##A(const Instruction* instr) = 0;
- VISITOR_LIST(DECLARE)
-#undef DECLARE
+ virtual void Visit(Metadata* metadata, const Instruction* instr) = 0;
bool IsConstVisitor() const { return constness_ == kConstVisitor; }
Instruction* MutableInstruction(const Instruction* instr) {
@@ -306,25 +300,6 @@ class DecoderVisitor {
const VisitorConstness constness_;
};
-// As above, but a default (no-op) implementation for each visitor is provided.
-// This is useful for derived class that only care about specific visitors.
-//
-// A minor version update may add a visitor, but will never remove one, so it is
-// safe (and recommended) to use `override` in derived classes.
-class DecoderVisitorWithDefaults : public DecoderVisitor {
- public:
- explicit DecoderVisitorWithDefaults(
- VisitorConstness constness = kConstVisitor)
- : DecoderVisitor(constness) {}
-
- virtual ~DecoderVisitorWithDefaults() {}
-
-#define DECLARE(A) \
- virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE { USE(instr); }
- VISITOR_LIST(DECLARE)
-#undef DECLARE
-};
-
class DecodeNode;
class CompiledDecodeNode;
@@ -389,9 +364,7 @@ class Decoder {
// of visitors stored by the decoder.
void RemoveVisitor(DecoderVisitor* visitor);
-#define DECLARE(A) void Visit##A(const Instruction* instr);
- VISITOR_LIST(DECLARE)
-#undef DECLARE
+ void VisitNamedInstruction(const Instruction* instr, const std::string& name);
std::list<DecoderVisitor*>* visitors() { return &visitors_; }
@@ -421,8 +394,6 @@ class Decoder {
std::map<std::string, DecodeNode> decode_nodes_;
};
-const int kMaxDecodeSampledBits = 16;
-const int kMaxDecodeMappings = 100;
typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
typedef uint32_t (Instruction::*BitExtractFn)(void) const;
@@ -436,10 +407,14 @@ struct VisitorNode {
// compilation stage. After compilation, the decoder is embodied in the graph
// of CompiledDecodeNodes pointer to by compiled_decoder_root_.
-// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits as a
-// string to the name of its handler.
+// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded
+// as uint32_t to its handler.
+// The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10.
+// 0b11 marks the edge of the most-significant bits of the pattern, which is
+// required to determine the length. For example, the pattern "1x01"_b is
+// encoded in a uint32_t as 0b11_01_10_00_01.
struct DecodePattern {
- const char* pattern;
+ uint32_t pattern;
const char* handler;
};
@@ -448,8 +423,8 @@ struct DecodePattern {
// sampled bits match to the corresponding name of a node.
struct DecodeMapping {
const char* name;
- const uint8_t sampled_bits[kMaxDecodeSampledBits];
- const DecodePattern mapping[kMaxDecodeMappings];
+ const std::vector<uint8_t> sampled_bits;
+ const std::vector<DecodePattern> mapping;
};
// For speed, before nodes can be used for decoding instructions, they must
@@ -463,7 +438,7 @@ class CompiledDecodeNode {
// function that extracts the bits to be sampled.
CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)
: bit_extract_fn_(bit_extract_fn),
- visitor_fn_(NULL),
+ instruction_name_("node"),
decode_table_size_(decode_table_size),
decoder_(NULL) {
decode_table_ = new CompiledDecodeNode*[decode_table_size_];
@@ -472,9 +447,9 @@ class CompiledDecodeNode {
// Constructor for wrappers around visitor functions. These require no
// decoding, so no bit extraction function or decode table is assigned.
- explicit CompiledDecodeNode(DecodeFnPtr visitor_fn, Decoder* decoder)
+ explicit CompiledDecodeNode(std::string iname, Decoder* decoder)
: bit_extract_fn_(NULL),
- visitor_fn_(visitor_fn),
+ instruction_name_(iname),
decode_table_(NULL),
decode_table_size_(0),
decoder_(decoder) {}
@@ -494,9 +469,9 @@ class CompiledDecodeNode {
// A leaf node is a wrapper for a visitor function.
bool IsLeafNode() const {
- VIXL_ASSERT(((visitor_fn_ == NULL) && (bit_extract_fn_ != NULL)) ||
- ((visitor_fn_ != NULL) && (bit_extract_fn_ == NULL)));
- return visitor_fn_ != NULL;
+ VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) ||
+ ((instruction_name_ != "node") && (bit_extract_fn_ == NULL)));
+ return instruction_name_ != "node";
}
// Get a pointer to the next node required in the decode process, based on the
@@ -521,7 +496,7 @@ class CompiledDecodeNode {
// Visitor function that handles the instruction identified. Set only for
// leaf nodes, where no extra decoding is required, otherwise NULL.
- const DecodeFnPtr visitor_fn_;
+ std::string instruction_name_;
// Mapping table from instruction bits to next decode stage.
CompiledDecodeNode** decode_table_;
@@ -535,30 +510,35 @@ class CompiledDecodeNode {
class DecodeNode {
public:
// Default constructor needed for map initialisation.
- DecodeNode() : compiled_node_(NULL) {}
+ DecodeNode()
+ : sampled_bits_(DecodeNode::kEmptySampledBits),
+ pattern_table_(DecodeNode::kEmptyPatternTable),
+ compiled_node_(NULL) {}
// Constructor for DecodeNode wrappers around visitor functions. These are
// marked as "compiled", as there is no decoding left to do.
- explicit DecodeNode(const VisitorNode& visitor, Decoder* decoder)
- : name_(visitor.name),
- visitor_fn_(visitor.visitor_fn),
+ explicit DecodeNode(const std::string& iname, Decoder* decoder)
+ : name_(iname),
+ sampled_bits_(DecodeNode::kEmptySampledBits),
+ instruction_name_(iname),
+ pattern_table_(DecodeNode::kEmptyPatternTable),
decoder_(decoder),
compiled_node_(NULL) {}
// Constructor for DecodeNodes that map bit patterns to other DecodeNodes.
explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL)
: name_(map.name),
- visitor_fn_(NULL),
+ sampled_bits_(map.sampled_bits),
+ instruction_name_("node"),
+ pattern_table_(map.mapping),
decoder_(decoder),
compiled_node_(NULL) {
- // The length of the bit string in the first mapping determines the number
- // of sampled bits. When adding patterns later, we assert that all mappings
- // sample the same number of bits.
- VIXL_CHECK(strcmp(map.mapping[0].pattern, "otherwise") != 0);
- int bit_count = static_cast<int>(strlen(map.mapping[0].pattern));
- VIXL_CHECK((bit_count > 0) && (bit_count <= 32));
- SetSampledBits(map.sampled_bits, bit_count);
- AddPatterns(map.mapping);
+ // With the current two bits per symbol encoding scheme, the maximum pattern
+ // length is (32 - 2) / 2 = 15 bits.
+ VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15);
+ for (const DecodePattern& p : map.mapping) {
+ VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size());
+ }
}
~DecodeNode() {
@@ -568,21 +548,15 @@ class DecodeNode {
}
}
- // Set the bits sampled from the instruction by this node.
- void SetSampledBits(const uint8_t* bits, int bit_count);
-
// Get the bits sampled from the instruction by this node.
- std::vector<uint8_t> GetSampledBits() const;
+ const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; }
// Get the number of bits sampled from the instruction by this node.
- size_t GetSampledBitsCount() const;
-
- // Add patterns to this node's internal pattern table.
- void AddPatterns(const DecodePattern* patterns);
+ size_t GetSampledBitsCount() const { return sampled_bits_.size(); }
// A leaf node is a DecodeNode that wraps the visitor function for the
// identified instruction class.
- bool IsLeafNode() const { return visitor_fn_ != NULL; }
+ bool IsLeafNode() const { return instruction_name_ != "node"; }
std::string GetName() const { return name_; }
@@ -597,7 +571,7 @@ class DecodeNode {
// Create a CompiledDecodeNode wrapping a visitor function. No decoding is
// required for this node; the visitor function is called instead.
void CreateVisitorNode() {
- compiled_node_ = new CompiledDecodeNode(visitor_fn_, decoder_);
+ compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_);
}
// Find and compile the DecodeNode named "name", and set it as the node for
@@ -609,12 +583,16 @@ class DecodeNode {
// contiguous sequence, suitable for indexing an array.
// For example, a mask of 0b1010 returns a function that, given an instruction
// 0bXYZW, will return 0bXZ.
- BitExtractFn GetBitExtractFunction(uint32_t mask);
+ BitExtractFn GetBitExtractFunction(uint32_t mask) {
+ return GetBitExtractFunctionHelper(mask, 0);
+ }
// Get a pointer to an Instruction method that applies a mask to the
// instruction bits, and tests if the result is equal to value. The returned
// function gives a 1 result if (inst & mask == value), 0 otherwise.
- BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value);
+ BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) {
+ return GetBitExtractFunctionHelper(value, mask);
+ }
// Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer
// to it. This pointer is also stored inside the DecodeNode itself. Destroying
@@ -626,22 +604,53 @@ class DecodeNode {
CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; }
bool IsCompiled() const { return GetCompiledNode() != NULL; }
+ enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 };
+ static const uint32_t kEndOfPattern = 3;
+ static const uint32_t kPatternSymbolMask = 3;
+
+ size_t GetPatternLength(uint32_t pattern) const {
+ uint32_t hsb = HighestSetBitPosition(pattern);
+ // The pattern length is signified by two set bits in a two bit-aligned
+ // position. Ensure that the pattern has a highest set bit, it's at an odd
+ // bit position, and that the bit to the right of the hsb is also set.
+ VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern);
+ return hsb / 2;
+ }
+
+ bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const {
+ while ((pattern & kPatternSymbolMask) != kEndOfPattern) {
+ if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol)
+ return true;
+ pattern >>= 2;
+ }
+ return false;
+ }
+
+ PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const {
+ size_t len = GetPatternLength(pattern);
+ VIXL_ASSERT((pos < 15) && (pos < len));
+ uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1));
+ uint32_t sym = (pattern >> shift) & kPatternSymbolMask;
+ return static_cast<PatternSymbol>(sym);
+ }
+
private:
- // Generate a mask and value pair from a string constructed from 0, 1 and x
- // (don't care) characters.
- // For example "10x1" should return mask = 0b1101, value = 0b1001.
+ // Generate a mask and value pair from a pattern constructed from 0, 1 and x
+ // (don't care) 2-bit symbols.
+ // For example "10x1"_b should return mask = 0b1101, value = 0b1001.
typedef std::pair<Instr, Instr> MaskValuePair;
- MaskValuePair GenerateMaskValuePair(std::string pattern) const;
-
- // Generate a pattern string ordered by the bit positions sampled by this
- // node. The first character in the string corresponds to the lowest sampled
- // bit.
- // For example, a pattern of "1x0" expected when sampling bits 31, 1 and 30
- // returns the pattern "x01"; bit 1 should be 'x', bit 30 '0' and bit 31 '1'.
+ MaskValuePair GenerateMaskValuePair(uint32_t pattern) const;
+
+ // Generate a pattern ordered by the bit positions sampled by this node.
+ // The symbol corresponding to the lowest sample position is placed in the
+ // least-significant bits of the result pattern.
+ // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30
+ // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31
+ // '1'.
// This output makes comparisons easier between the pattern and bits sampled
// from an instruction using the fast "compress" algorithm. See
// Instruction::Compress().
- std::string GenerateOrderedPattern(std::string pattern) const;
+ uint32_t GenerateOrderedPattern(uint32_t pattern) const;
// Generate a mask with a bit set at each sample position.
uint32_t GenerateSampledBitsMask() const;
@@ -650,20 +659,26 @@ class DecodeNode {
// true if successful.
bool TryCompileOptimisedDecodeTable(Decoder* decoder);
+ // Helper function that returns a bit extracting function. If y is zero,
+ // x is a bit extraction mask. Otherwise, y is the mask, and x is the value
+ // to match after masking.
+ BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y);
+
// Name of this decoder node, used to construct edges in the decode graph.
std::string name_;
// Vector of bits sampled from an instruction to determine which node to look
// up next in the decode process.
- std::vector<uint8_t> sampled_bits_;
+ const std::vector<uint8_t>& sampled_bits_;
+ static const std::vector<uint8_t> kEmptySampledBits;
- // Visitor function that handles the instruction identified. Set only for leaf
- // nodes, where no extra decoding is required. For non-leaf decoding nodes,
- // this pointer is NULL.
- DecodeFnPtr visitor_fn_;
+ // For leaf nodes, this is the name of the instruction form that the node
+ // represents. For other nodes, this is always set to "node".
+ std::string instruction_name_;
// Source mapping from bit pattern to name of next decode stage.
- std::vector<DecodePattern> pattern_table_;
+ const std::vector<DecodePattern>& pattern_table_;
+ static const std::vector<DecodePattern> kEmptyPatternTable;
// Pointer to the decoder containing this node, used to call its visitor
// function for leaf nodes.
diff --git a/src/aarch64/decoder-constants-aarch64.h b/src/aarch64/decoder-constants-aarch64.h
index 53f283bb..ddfdff6e 100644
--- a/src/aarch64/decoder-constants-aarch64.h
+++ b/src/aarch64/decoder-constants-aarch64.h
@@ -27,2112 +27,8816 @@
namespace vixl {
namespace aarch64 {
+// Recursively construct a uint32_t encoded bit pattern from a string literal.
+// The string characters are mapped as two-bit symbols '0'=>0, '1'=>1, 'x'=>2.
+// The remaining symbol, 3, is used to mark the end of the pattern, allowing
+// its length to be found. For example, the pattern "1x01"_b is encoded in a
+// uint32_t as 0b11_01_00_01. The maximum pattern string length is 15
+// characters, encoded as 3 in the most significant bits, followed by 15 2-bit
+// symbols.
+constexpr uint32_t str_to_two_bit_pattern(const char* x, size_t s, uint32_t a) {
+ if (s == 0) return a;
+ uint32_t r = (x[0] == 'x') ? 2 : (x[0] - '0');
+ return str_to_two_bit_pattern(x + 1, s - 1, (a << 2) | r);
+}
+
+constexpr uint32_t operator"" _b(const char* x, size_t s) {
+ return str_to_two_bit_pattern(x, s, DecodeNode::kEndOfPattern);
+}
+
// This decode table is derived from the AArch64 ISA XML specification,
// available from https://developer.arm.com/products/architecture/a-profile/
-//
-// The data below are based on the "Index by Encoding" tables, reformatted into
-// structures of C++ strings, suitable for processing into an instruction
-// decoding tree.
// clang-format off
static const DecodeMapping kDecodeMapping[] = {
- { "Root",
- {28, 27, 26, 25},
- { {"0000", "DecodeReserved"},
- {"0010", "DecodeSVE"},
- {"100x", "DecodeDataProcessingImmediate"},
- {"101x", "DecodeBranchesExceptionAndSystem"},
- {"x1x0", "DecodeLoadsAndStores"},
- {"x101", "DecodeDataProcessingRegister"},
- {"x111", "DecodeDataProcessingFPAndNEON"},
- },
- },
-
- { "DecodeReserved",
- {31, 30, 29, 24, 23, 22, 21, 20, 19, 18, 17, 16},
- { {"000000000000", "VisitReserved"},
- {"otherwise", "VisitUnallocated"},
- },
- },
-
- { "DecodeDataProcessingImmediate",
- {25, 24, 23},
- { {"00x", "VisitPCRelAddressing"},
- {"01x", "UnallocAddSubImmediate"},
- {"100", "UnallocLogicalImmediate"},
- {"101", "UnallocMoveWideImmediate"},
- {"110", "UnallocBitfield"},
- {"111", "UnallocExtract"},
- },
- },
-
- { "DecodeBranchesExceptionAndSystem",
- {31, 30, 29, 25, 24, 23, 22},
- { {"0100xxx", "UnallocConditionalBranch"},
- {"11000xx", "UnallocException"},
- {"1100100", "UnallocSystem"},
- {"1101xxx", "UnallocUnconditionalBranchToRegister"},
- {"x00xxxx", "VisitUnconditionalBranch"},
- {"x010xxx", "VisitCompareBranch"},
- {"x011xxx", "VisitTestBranch"},
- },
- },
-
- { "DecodeLoadsAndStores",
- {31, 29, 28, 26, 24, 23, 21},
- { {"x0000xx", "UnallocLoadStoreExclusive"},
- {"x01x0xx", "UnallocLoadLiteral"},
- {"x0101x0", "UnallocLoadStoreRCpcUnscaledOffset"},
- {"x10x00x", "UnallocLoadStorePairNonTemporal"},
- {"x10x01x", "UnallocLoadStorePairPostIndex"},
- {"x10x10x", "UnallocLoadStorePairOffset"},
- {"x10x11x", "UnallocLoadStorePairPreIndex"},
- {"0001000", "DecodeNEONLoadStoreMulti"},
- {"0001010", "UnallocNEONLoadStoreMultiStructPostIndex"},
- {"000110x", "DecodeNEONLoadStoreSingle"},
- {"000111x", "UnallocNEONLoadStoreSingleStructPostIndex"},
- {"x11x0x0", "DecodeLoadStore"},
- {"x11x0x1", "DecodeLoadStoreRegister"},
- {"x11x1xx", "UnallocLoadStoreUnsignedOffset"},
- },
- },
-
- { "DecodeDataProcessingRegister",
- {30, 28, 24, 23, 22, 21},
- { {"010110", "UnallocDataProcessing2Source"},
- {"110110", "UnallocDataProcessing1Source"},
- {"x00xxx", "UnallocLogicalShifted"},
- {"x01xx0", "UnallocAddSubShifted"},
- {"x01xx1", "UnallocAddSubExtended"},
- {"x10000", "UnallocAddSubWithCarry"},
- {"x10010", "DecodeCondCmp"},
- {"x10100", "UnallocConditionalSelect"},
- {"x11xxx", "UnallocDataProcessing3Source"},
- },
- },
-
- { "DecodeDataProcessingFPAndNEON",
- {31, 30, 29, 28, 24, 21},
- { {"0xx000", "DecodeNEONOther"},
- {"0xx001", "DecodeNEON3Op"},
- {"0xx01x", "DecodeNEONImmAndIndex"},
- {"01x100", "DecodeNEONScalarAnd3SHA"},
- {"01x101", "DecodeNEONScalarAnd2SHA"},
- {"01x11x", "DecodeNEONScalar"},
- {"x0x100", "UnallocFPFixedPointConvert"},
- {"x0x101", "DecodeFP"},
- {"x0x11x", "UnallocFPDataProcessing3Source"},
- },
- },
-
- { "DecodeSVE",
- {31, 30, 29, 24, 21, 15, 14, 13},
- { {"00000x1x", "VisitSVEIntMulAddPredicated"},
- {"00000000", "DecodeSVE00000000"},
- {"00000001", "DecodeSVE00000001"},
- {"00000100", "DecodeSVE00000100"},
- {"00000101", "VisitSVEIntUnaryArithmeticPredicated"},
- {"00001000", "VisitSVEIntArithmeticUnpredicated"},
- {"00001001", "VisitSVEBitwiseLogicalUnpredicated"},
- {"00001010", "DecodeSVE00001010"},
- {"00001100", "VisitSVEBitwiseShiftUnpredicated"},
- {"00001101", "DecodeSVE00001101"},
- {"00001110", "DecodeSVE00001110"},
- {"00001111", "DecodeSVE00001111"},
- {"000100xx", "DecodeSVE000100xx"},
- {"0001010x", "DecodeSVE0001010x"},
- {"00010110", "DecodeSVE00010110"},
- {"00010111", "DecodeSVE00010111"},
- {"00011000", "VisitSVEPermuteVectorExtract"},
- {"00011001", "DecodeSVE00011001"},
- {"00011010", "DecodeSVE00011010"},
- {"00011011", "VisitSVEPermuteVectorInterleaving"},
- {"00011100", "DecodeSVE00011100"},
- {"00011101", "DecodeSVE00011101"},
- {"0001111x", "VisitSVEVectorSelect"},
- {"00100xxx", "VisitSVEIntCompareVectors"},
- {"00101xxx", "VisitSVEIntCompareUnsignedImm"},
- {"00110x0x", "VisitSVEIntCompareSignedImm"},
- {"0011001x", "DecodeSVE0011001x"},
- {"00110110", "DecodeSVE00110110"},
- {"00110111", "DecodeSVE00110111"},
- {"00111000", "VisitSVEIntCompareScalarCountAndLimit"},
- {"00111001", "UnallocSVEConditionallyTerminateScalars"},
- {"00111100", "DecodeSVE00111100"},
- {"00111101", "UnallocSVEPredicateCount"},
- {"0011111x", "DecodeSVE0011111x"},
- {"010000xx", "VisitSVEIntMulAddUnpredicated"},
- {"01001xxx", "VisitSVEMulIndex"},
- {"011000xx", "VisitSVEFPComplexMulAdd"},
- {"01100100", "UnallocSVEFPComplexAddition"},
- {"01101000", "DecodeSVE01101000"},
- {"01101001", "UnallocSVEFPMulIndex"},
- {"01110x1x", "VisitSVEFPCompareVectors"},
- {"01110000", "VisitSVEFPArithmeticUnpredicated"},
- {"01110001", "DecodeSVE01110001"},
- {"01110100", "DecodeSVE01110100"},
- {"01110101", "DecodeSVE01110101"},
- {"01111xxx", "VisitSVEFPMulAdd"},
- {"100x010x", "UnallocSVELoadAndBroadcastElement"},
- {"100x0110", "DecodeSVE100x0110"},
- {"100x0111", "DecodeSVE100x0111"},
- {"100x11xx", "DecodeSVE100x11xx"},
- {"100000xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
- {"100010xx", "DecodeSVE100010xx"},
- {"100100x1", "DecodeSVE100100x1"},
- {"10010000", "DecodeSVE10010000"},
- {"10010010", "DecodeSVE10010010"},
- {"100110x1", "DecodeSVE100110x1"},
- {"10011000", "DecodeSVE10011000"},
- {"10011010", "DecodeSVE10011010"},
- {"101xx000", "VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar"},
- {"101xx001", "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm"},
- {"101xx010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
- {"101xx011", "VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar"},
- {"101xx101", "DecodeSVE101xx101"},
- {"101x0110", "DecodeSVE101x0110"},
- {"101x0111", "DecodeSVE101x0111"},
- {"101x1110", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
- {"101x1111", "DecodeSVE101x1111"},
- {"110x00xx", "VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets"},
- {"110x0111", "DecodeSVE110x0111"},
- {"1100010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
- {"11000110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
- {"110010xx", "DecodeSVE110010xx"},
- {"110011xx", "DecodeSVE110011xx"},
- {"1101010x", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
- {"11010110", "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
- {"110110xx", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
- {"110111xx", "DecodeSVE110111xx"},
- {"111x0011", "DecodeSVE111x0011"},
- {"111x01x0", "DecodeSVE111x01x0"},
- {"111x0101", "DecodeSVE111x0101"},
- {"111x0111", "DecodeSVE111x0111"},
- {"111x1011", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
- {"111x11x0", "DecodeSVE111x11x0"},
- {"111x1101", "DecodeSVE111x1101"},
- {"111x1111", "DecodeSVE111x1111"},
- {"1110x010", "VisitSVEContiguousStore_ScalarPlusScalar"},
- {"1111x000", "UnallocSVEStorePredicateRegister"},
- {"1111x010", "DecodeSVE1111x010"},
- },
- },
-
- { "DecodeSVE00000000",
- {20, 19, 18},
- { {"00x", "VisitSVEIntAddSubtractVectors_Predicated"},
- {"01x", "VisitSVEIntMinMaxDifference_Predicated"},
- {"100", "VisitSVEIntMulVectors_Predicated"},
- {"101", "VisitSVEIntDivideVectors_Predicated"},
- {"11x", "VisitSVEBitwiseLogical_Predicated"},
+ { "_gggyqx",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtnu_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtnu_asimdmisc_r"},
+ {"1111001"_b, "fcvtpu_asimdmiscfp16_r"},
+ {"1x00001"_b, "fcvtpu_asimdmisc_r"},
+ {"xx10000"_b, "umaxv_asimdall_only"},
+ {"xx10001"_b, "uminv_asimdall_only"},
+ },
+ },
+
+ { "_ggvztl",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_qpzynz"},
+ },
+ },
+
+ { "_ghmzhr",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "rbit_32_dp_1src"},
+ {"0000001"_b, "clz_32_dp_1src"},
+ },
+ },
+
+ { "_ghnljt",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0000000"_b, "fcvtns_64s_float2int"},
+ {"0000001"_b, "fcvtnu_64s_float2int"},
+ {"0000010"_b, "scvtf_s64_float2int"},
+ {"0000011"_b, "ucvtf_s64_float2int"},
+ {"0000100"_b, "fcvtas_64s_float2int"},
+ {"0000101"_b, "fcvtau_64s_float2int"},
+ {"0001000"_b, "fcvtps_64s_float2int"},
+ {"0001001"_b, "fcvtpu_64s_float2int"},
+ {"0010000"_b, "fcvtms_64s_float2int"},
+ {"0010001"_b, "fcvtmu_64s_float2int"},
+ {"0011000"_b, "fcvtzs_64s_float2int"},
+ {"0011001"_b, "fcvtzu_64s_float2int"},
+ {"0100000"_b, "fcvtns_64d_float2int"},
+ {"0100001"_b, "fcvtnu_64d_float2int"},
+ {"0100010"_b, "scvtf_d64_float2int"},
+ {"0100011"_b, "ucvtf_d64_float2int"},
+ {"0100100"_b, "fcvtas_64d_float2int"},
+ {"0100101"_b, "fcvtau_64d_float2int"},
+ {"0100110"_b, "fmov_64d_float2int"},
+ {"0100111"_b, "fmov_d64_float2int"},
+ {"0101000"_b, "fcvtps_64d_float2int"},
+ {"0101001"_b, "fcvtpu_64d_float2int"},
+ {"0110000"_b, "fcvtms_64d_float2int"},
+ {"0110001"_b, "fcvtmu_64d_float2int"},
+ {"0111000"_b, "fcvtzs_64d_float2int"},
+ {"0111001"_b, "fcvtzu_64d_float2int"},
+ {"1001110"_b, "fmov_64vx_float2int"},
+ {"1001111"_b, "fmov_v64i_float2int"},
+ {"1100000"_b, "fcvtns_64h_float2int"},
+ {"1100001"_b, "fcvtnu_64h_float2int"},
+ {"1100010"_b, "scvtf_h64_float2int"},
+ {"1100011"_b, "ucvtf_h64_float2int"},
+ {"1100100"_b, "fcvtas_64h_float2int"},
+ {"1100101"_b, "fcvtau_64h_float2int"},
+ {"1100110"_b, "fmov_64h_float2int"},
+ {"1100111"_b, "fmov_h64_float2int"},
+ {"1101000"_b, "fcvtps_64h_float2int"},
+ {"1101001"_b, "fcvtpu_64h_float2int"},
+ {"1110000"_b, "fcvtms_64h_float2int"},
+ {"1110001"_b, "fcvtmu_64h_float2int"},
+ {"1111000"_b, "fcvtzs_64h_float2int"},
+ {"1111001"_b, "fcvtzu_64h_float2int"},
+ },
+ },
+
+ { "_gjprmg",
+ {11},
+ { {"0"_b, "_llpsqq"},
},
},
- { "DecodeSVE00000100",
- {20, 19},
- { {"0x", "VisitSVEBitwiseShiftByImm_Predicated"},
- {"10", "VisitSVEBitwiseShiftByVector_Predicated"},
- {"11", "VisitSVEBitwiseShiftByWideElements_Predicated"},
+ { "_gjsnly",
+ {16, 13, 12},
+ { {"000"_b, "rev16_64_dp_1src"},
+ {"001"_b, "cls_64_dp_1src"},
+ {"100"_b, "pacib_64p_dp_1src"},
+ {"101"_b, "autib_64p_dp_1src"},
+ {"110"_b, "_ksvxxm"},
+ {"111"_b, "_xsgxyy"},
+ },
+ },
+
+ { "_gjylrt",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "fcvtns_32h_float2int"},
+ {"00001"_b, "fcvtnu_32h_float2int"},
+ {"00010"_b, "scvtf_h32_float2int"},
+ {"00011"_b, "ucvtf_h32_float2int"},
+ {"00100"_b, "fcvtas_32h_float2int"},
+ {"00101"_b, "fcvtau_32h_float2int"},
+ {"00110"_b, "fmov_32h_float2int"},
+ {"00111"_b, "fmov_h32_float2int"},
+ {"01000"_b, "fcvtps_32h_float2int"},
+ {"01001"_b, "fcvtpu_32h_float2int"},
+ {"10000"_b, "fcvtms_32h_float2int"},
+ {"10001"_b, "fcvtmu_32h_float2int"},
+ {"11000"_b, "fcvtzs_32h_float2int"},
+ {"11001"_b, "fcvtzu_32h_float2int"},
+ },
+ },
+
+ { "_gkhhjm",
+ {30, 23, 22},
+ { {"000"_b, "sbfm_32m_bitfield"},
+ {"100"_b, "ubfm_32m_bitfield"},
+ },
+ },
+
+ { "_gkkpjz",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtmu_asisdmiscfp16_r"},
+ {"0x00001"_b, "fcvtmu_asisdmisc_r"},
+ {"1111001"_b, "fcvtzu_asisdmiscfp16_r"},
+ {"1x00001"_b, "fcvtzu_asisdmisc_r"},
+ {"xx00000"_b, "neg_asisdmisc_r"},
+ },
+ },
+
+ { "_gkpvxz",
+ {10},
+ { {"0"_b, "blraa_64p_branch_reg"},
+ {"1"_b, "blrab_64p_branch_reg"},
+ },
+ },
+
+ { "_gkpzhr",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "fnmsub_s_floatdp3"},
+ {"001xxxx"_b, "fnmsub_d_floatdp3"},
+ {"011xxxx"_b, "fnmsub_h_floatdp3"},
+ {"10001x0"_b, "fmul_asisdelem_rh_h"},
+ {"10x0101"_b, "sqshrn_asisdshf_n"},
+ {"10x0111"_b, "sqrshrn_asisdshf_n"},
+ {"11x01x0"_b, "fmul_asisdelem_r_sd"},
+ {"1xx11x0"_b, "sqdmull_asisdelem_l"},
+ },
+ },
+
+ { "_gkxgsn",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "stlur_32_ldapstl_unscaled"},
+ {"00100"_b, "ldapur_32_ldapstl_unscaled"},
+ {"01000"_b, "ldapursw_64_ldapstl_unscaled"},
+ {"10000"_b, "stlur_64_ldapstl_unscaled"},
+ {"10100"_b, "ldapur_64_ldapstl_unscaled"},
+ },
+ },
+
+ { "_glgrjy",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0000000"_b, "not_asimdmisc_r"},
+ {"0100000"_b, "rbit_asimdmisc_r"},
+ },
+ },
+
+ { "_glhxyj",
+ {17},
+ { {"0"_b, "ld3_asisdlsop_bx3_r3b"},
+ {"1"_b, "ld3_asisdlsop_b3_i3b"},
+ },
+ },
+
+ { "_glkzlv",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "rev16_asimdmisc_r"},
+ },
+ },
+
+ { "_gmjhll",
+ {17},
+ { {"0"_b, "st1_asisdlsep_r4_r4"},
+ {"1"_b, "st1_asisdlsep_i4_i4"},
+ },
+ },
+
+ { "_gmrxlp",
+ {30},
+ { {"0"_b, "orr_32_log_shift"},
+ {"1"_b, "ands_32_log_shift"},
},
},
- { "DecodeSVE00001010",
- {23, 12, 11},
- { {"x0x", "VisitSVEIndexGeneration"},
- {"010", "VisitSVEStackFrameAdjustment"},
- {"110", "UnallocSVEStackFrameSize"},
+ { "_gmrxqq",
+ {30, 23, 22},
+ { {"000"_b, "stp_q_ldstpair_off"},
+ {"001"_b, "ldp_q_ldstpair_off"},
+ {"010"_b, "stp_q_ldstpair_pre"},
+ {"011"_b, "ldp_q_ldstpair_pre"},
},
},
- { "UnallocSVEStackFrameSize",
+ { "_gmsgqz",
+ {30, 23, 22},
+ { {"100"_b, "eor3_vvv16_crypto4"},
+ {"101"_b, "sm3ss1_vvv4_crypto4"},
+ {"110"_b, "xar_vvv2_crypto3_imm6"},
+ },
+ },
+
+ { "_gmvjgn",
+ {23},
+ { {"0"_b, "fmax_asimdsame_only"},
+ {"1"_b, "fmin_asimdsame_only"},
+ },
+ },
+
+ { "_gmvrxn",
+ {18, 17, 12},
+ { {"000"_b, "st4_asisdlso_d4_4d"},
+ },
+ },
+
+ { "_gmvtss",
+ {30},
+ { {"0"_b, "ldr_q_loadlit"},
+ },
+ },
+
+ { "_gngjxr",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "cadd_z_zz"},
+ {"00001"_b, "sqcadd_z_zz"},
+ },
+ },
+
+ { "_gnqhsl",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0010000"_b, "punpklo_p_p"},
+ {"0010001"_b, "punpkhi_p_p"},
+ {"xx0xxxx"_b, "zip1_p_pp"},
+ {"xx10100"_b, "rev_p_p"},
+ },
+ },
+
+ { "_gnqjhz",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "rev16_32_dp_1src"},
+ {"0000001"_b, "cls_32_dp_1src"},
+ },
+ },
+
+ { "_gntpyh",
+ {23, 13, 12, 11, 10},
+ { {"00010"_b, "_gqspys"},
+ {"00110"_b, "_ymgrgx"},
+ {"01001"_b, "fcmge_asisdsame_only"},
+ {"01011"_b, "facge_asisdsame_only"},
+ {"01110"_b, "_kjyphv"},
+ {"10010"_b, "_myjqrl"},
+ {"10101"_b, "fabd_asisdsame_only"},
+ {"10110"_b, "_vlsmsn"},
+ {"11001"_b, "fcmgt_asisdsame_only"},
+ {"11011"_b, "facgt_asisdsame_only"},
+ {"11110"_b, "_pxtsvn"},
+ },
+ },
+
+ { "_gnxgxs",
+ {30, 18},
+ { {"00"_b, "_krlpjl"},
+ },
+ },
+
+ { "_gnytkh",
+ {1, 0},
+ { {"11"_b, "braaz_64_branch_reg"},
+ },
+ },
+
+ { "_gpxltv",
+ {23, 18, 17, 16},
+ { {"0000"_b, "uqxtnt_z_zz"},
+ },
+ },
+
+ { "_gqspys",
{22, 20, 19, 18, 17, 16},
- { {"011111", "VisitSVEStackFrameSize"},
+ { {"111001"_b, "fcvtau_asisdmiscfp16_r"},
+ {"x00001"_b, "fcvtau_asisdmisc_r"},
+ {"x10000"_b, "fmaxnmp_asisdpair_only_sd"},
},
},
- { "DecodeSVE00001101",
- {12, 11, 10},
- { {"0xx", "VisitSVEAddressGeneration"},
- {"10x", "VisitSVEFPTrigSelectCoefficient"},
- {"110", "VisitSVEFPExponentialAccelerator"},
- {"111", "VisitSVEConstructivePrefix_Unpredicated"},
+ { "_gqykqv",
+ {23, 22, 12},
+ { {"000"_b, "_rjmyyl"},
+ {"001"_b, "_zqltpy"},
+ {"010"_b, "_hstvrp"},
+ {"011"_b, "_yhqyzj"},
+ {"110"_b, "_mxtskk"},
+ {"111"_b, "_qmjqhq"},
},
},
- { "DecodeSVE00001110",
- {20, 12, 11},
- { {"00x", "VisitSVESaturatingIncDecVectorByElementCount"},
- {"100", "VisitSVEIncDecVectorByElementCount"},
+ { "_grqnlm",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "fnmadd_s_floatdp3"},
+ {"001xxxx"_b, "fnmadd_d_floatdp3"},
+ {"011xxxx"_b, "fnmadd_h_floatdp3"},
+ {"10001x0"_b, "fmla_asisdelem_rh_h"},
+ {"10x0001"_b, "sshr_asisdshf_r"},
+ {"10x0101"_b, "ssra_asisdshf_r"},
+ {"10x1001"_b, "srshr_asisdshf_r"},
+ {"10x1101"_b, "srsra_asisdshf_r"},
+ {"11x01x0"_b, "fmla_asisdelem_r_sd"},
+ {"1xx11x0"_b, "sqdmlal_asisdelem_l"},
},
},
- { "DecodeSVE00001111",
- {20, 12, 11},
- { {"x1x", "VisitSVESaturatingIncDecRegisterByElementCount"},
- {"000", "VisitSVEElementCount"},
- {"100", "VisitSVEIncDecRegisterByElementCount"},
+ { "_grrjlh",
+ {30},
+ { {"1"_b, "_jlqxvj"},
},
},
- { "DecodeSVE000100xx",
- {23, 22, 20, 19, 18},
- { {"xx1xx", "VisitSVECopyIntImm_Predicated"},
- {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"11000", "VisitSVEBroadcastBitmaskImm"},
+ { "_grxzzg",
+ {23, 22},
+ { {"00"_b, "tbx_asimdtbl_l2_2"},
},
},
- { "DecodeSVE0001010x",
- {23, 22, 20, 19, 18},
- { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"11000", "VisitSVEBroadcastBitmaskImm"},
+ { "_gsgzpg",
+ {17},
+ { {"0"_b, "ld2_asisdlso_h2_2h"},
},
},
- { "DecodeSVE00010110",
- {23, 22, 20, 19, 18},
- { {"xx1xx", "VisitSVECopyFPImm_Predicated"},
- {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"11000", "VisitSVEBroadcastBitmaskImm"},
+ { "_gshrzq",
+ {22, 20, 11},
+ { {"010"_b, "decb_r_rs"},
+ {"110"_b, "dech_r_rs"},
},
},
- { "DecodeSVE00010111",
- {23, 22, 20, 19, 18},
- { {"0x000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"10000", "VisitSVEBitwiseLogicalWithImm_Unpredicated"},
- {"11000", "VisitSVEBroadcastBitmaskImm"},
+ { "_gskkxk",
+ {17},
+ { {"0"_b, "st1_asisdlso_h1_1h"},
+ },
+ },
+
+ { "_gsttpm",
+ {12},
+ { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+ },
+ },
+
+ { "_gszlvl",
+ {30},
+ { {"0"_b, "_tvsszp"},
+ {"1"_b, "_njtngm"},
+ },
+ },
+
+ { "_gszxkp",
+ {13, 12},
+ { {"11"_b, "cmgt_asisdsame_only"},
+ },
+ },
+
+ { "_gtjskz",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1011011"_b, "bfmmla_asimdsame2_e"},
+ {"x011111"_b, "bfdot_asimdsame2_d"},
+ {"x111111"_b, "bfmlal_asimdsame2_f"},
+ {"xxx0xx1"_b, "fcmla_asimdsame2_c"},
+ {"xxx1x01"_b, "fcadd_asimdsame2_c"},
+ },
+ },
+
+ { "_gttglx",
+ {17},
+ { {"0"_b, "st4_asisdlso_h4_4h"},
+ },
+ },
+
+ { "_gtvhmp",
+ {30, 13},
+ { {"00"_b, "_rjyrnt"},
+ {"01"_b, "_mzhsrq"},
+ {"10"_b, "_xtzlzy"},
+ {"11"_b, "_kqxhzx"},
+ },
+ },
+
+ { "_gtxpgx",
+ {30, 23, 13, 4},
+ { {"0000"_b, "prfw_i_p_bz_s_x32_scaled"},
+ {"0010"_b, "prfd_i_p_bz_s_x32_scaled"},
+ {"010x"_b, "ld1h_z_p_bz_s_x32_scaled"},
+ {"011x"_b, "ldff1h_z_p_bz_s_x32_scaled"},
+ {"1000"_b, "prfw_i_p_bz_d_x32_scaled"},
+ {"1010"_b, "prfd_i_p_bz_d_x32_scaled"},
+ {"110x"_b, "ld1h_z_p_bz_d_x32_scaled"},
+ {"111x"_b, "ldff1h_z_p_bz_d_x32_scaled"},
+ },
+ },
+
+ { "_gvjgyp",
+ {23, 22, 13, 12, 11, 10},
+ { {"0001x0"_b, "fmls_asimdelem_rh_h"},
+ {"0x0101"_b, "shl_asimdshf_r"},
+ {"0x1101"_b, "sqshl_asimdshf_r"},
+ {"1000x0"_b, "fmlsl_asimdelem_lh"},
+ {"1x01x0"_b, "fmls_asimdelem_r_sd"},
+ {"xx10x0"_b, "smlsl_asimdelem_l"},
+ {"xx11x0"_b, "sqdmlsl_asimdelem_l"},
+ },
+ },
+
+ { "_gvstrp",
+ {17},
+ { {"0"_b, "ld2_asisdlsop_bx2_r2b"},
+ {"1"_b, "ld2_asisdlsop_b2_i2b"},
+ },
+ },
+
+ { "_gvykrp",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"10001x0"_b, "fmulx_asisdelem_rh_h"},
+ {"10x0001"_b, "sqshrun_asisdshf_n"},
+ {"10x0011"_b, "sqrshrun_asisdshf_n"},
+ {"10x0101"_b, "uqshrn_asisdshf_n"},
+ {"10x0111"_b, "uqrshrn_asisdshf_n"},
+ {"11x01x0"_b, "fmulx_asisdelem_r_sd"},
+ },
+ },
+
+ { "_gxlvsg",
+ {13},
+ { {"0"_b, "_vpxvjs"},
+ {"1"_b, "_lpslrz"},
+ },
+ },
+
+ { "_gxmnkl",
+ {23, 22},
+ { {"10"_b, "cdot_z_zzzi_s"},
+ {"11"_b, "cdot_z_zzzi_d"},
+ },
+ },
+
+ { "_gxnlxg",
+ {20, 19, 18, 17, 16},
+ { {"00001"_b, "uqxtn_asisdmisc_n"},
+ },
+ },
+
+ { "_gxslgq",
+ {23, 22, 20, 19, 17, 16},
+ { {"000010"_b, "scvtf_s32_float2fix"},
+ {"000011"_b, "ucvtf_s32_float2fix"},
+ {"001100"_b, "fcvtzs_32s_float2fix"},
+ {"001101"_b, "fcvtzu_32s_float2fix"},
+ {"010010"_b, "scvtf_d32_float2fix"},
+ {"010011"_b, "ucvtf_d32_float2fix"},
+ {"011100"_b, "fcvtzs_32d_float2fix"},
+ {"011101"_b, "fcvtzu_32d_float2fix"},
+ {"110010"_b, "scvtf_h32_float2fix"},
+ {"110011"_b, "ucvtf_h32_float2fix"},
+ {"111100"_b, "fcvtzs_32h_float2fix"},
+ {"111101"_b, "fcvtzu_32h_float2fix"},
+ },
+ },
+
+ { "_gygnsz",
+ {17},
+ { {"0"_b, "ld2_asisdlsop_hx2_r2h"},
+ {"1"_b, "ld2_asisdlsop_h2_i2h"},
+ },
+ },
+
+ { "_gymljg",
+ {23},
+ { {"0"_b, "fmulx_asimdsame_only"},
+ },
+ },
+
+ { "_gyrjrm",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "cpy_z_p_v"},
+ {"00001"_b, "compact_z_p_z"},
+ {"00010"_b, "lasta_v_p_z"},
+ {"00011"_b, "lastb_v_p_z"},
+ {"00100"_b, "revb_z_z"},
+ {"00101"_b, "revh_z_z"},
+ {"00110"_b, "revw_z_z"},
+ {"00111"_b, "rbit_z_p_z"},
+ {"01000"_b, "clasta_z_p_zz"},
+ {"01001"_b, "clastb_z_p_zz"},
+ {"01010"_b, "clasta_v_p_z"},
+ {"01011"_b, "clastb_v_p_z"},
+ {"01100"_b, "splice_z_p_zz_des"},
+ {"01101"_b, "splice_z_p_zz_con"},
+ },
+ },
+
+ { "_gznnvh",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "frinta_asimdmiscfp16_r"},
+ {"0x00001"_b, "frinta_asimdmisc_r"},
+ {"xx00000"_b, "cmge_asimdmisc_z"},
+ },
+ },
+
+ { "_gzqvnk",
+ {23, 12, 4, 3, 2, 1, 0},
+ { {"1000000"_b, "ctermeq_rr"},
+ {"1010000"_b, "ctermne_rr"},
+ {"x10xxxx"_b, "whilewr_p_rr"},
+ {"x11xxxx"_b, "whilerw_p_rr"},
+ },
+ },
+
+ { "_gzvgmh",
+ {18, 17, 12},
+ { {"0x0"_b, "ld4_asisdlsop_dx4_r4d"},
+ {"100"_b, "ld4_asisdlsop_dx4_r4d"},
+ {"110"_b, "ld4_asisdlsop_d4_i4d"},
+ },
+ },
+
+ { "_gzylzp",
+ {17},
+ { {"0"_b, "st3_asisdlsop_hx3_r3h"},
+ {"1"_b, "st3_asisdlsop_h3_i3h"},
+ },
+ },
+
+ { "_hggmnk",
+ {13, 12},
+ { {"10"_b, "lslv_32_dp_2src"},
+ },
+ },
+
+ { "_hgxqpp",
+ {18, 17},
+ { {"00"_b, "st3_asisdlso_s3_3s"},
+ },
+ },
+
+ { "_hgxtqy",
+ {30, 23, 22, 13},
+ { {"0001"_b, "ldnt1w_z_p_ar_s_x32_unscaled"},
+ {"0010"_b, "ld1rsh_z_p_bi_s64"},
+ {"0011"_b, "ld1rsh_z_p_bi_s32"},
+ {"0110"_b, "ld1rsb_z_p_bi_s64"},
+ {"0111"_b, "ld1rsb_z_p_bi_s32"},
+ {"1000"_b, "ldnt1sw_z_p_ar_d_64_unscaled"},
+ {"1010"_b, "ld1sw_z_p_bz_d_64_unscaled"},
+ {"1011"_b, "ldff1sw_z_p_bz_d_64_unscaled"},
+ },
+ },
+
+ { "_hhhqjk",
+ {4, 3, 2, 1, 0},
+ { {"11111"_b, "_pqpzkt"},
+ },
+ },
+
+ { "_hhkhkk",
+ {30, 23, 11, 10},
+ { {"1001"_b, "_lkvynm"},
+ },
+ },
+
+ { "_hhkqtn",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "lasta_r_p_z"},
+ {"00001"_b, "lastb_r_p_z"},
+ {"01000"_b, "cpy_z_p_r"},
+ {"10000"_b, "clasta_r_p_z"},
+ {"10001"_b, "clastb_r_p_z"},
+ },
+ },
+
+ { "_hhnjjk",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "pacdzb_64z_dp_1src"},
+ },
+ },
+
+ { "_hhymvj",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000011"_b, "sqabs_asisdmisc_r"},
+ {"0000100"_b, "sqxtn_asisdmisc_n"},
+ },
+ },
+
+ { "_hjgylh",
+ {30, 23, 22},
+ { {"000"_b, "str_s_ldst_pos"},
+ {"001"_b, "ldr_s_ldst_pos"},
+ {"100"_b, "str_d_ldst_pos"},
+ {"101"_b, "ldr_d_ldst_pos"},
+ },
+ },
+
+ { "_hjqtrt",
+ {12},
+ { {"0"_b, "st1_asisdlsop_dx1_r1d"},
+ },
+ },
+
+ { "_hjtvvm",
+ {13, 12},
+ { {"00"_b, "sdiv_64_dp_2src"},
+ {"10"_b, "rorv_64_dp_2src"},
+ },
+ },
+
+ { "_hljrqn",
+ {22},
+ { {"0"_b, "str_32_ldst_regoff"},
+ {"1"_b, "ldr_32_ldst_regoff"},
+ },
+ },
+
+ { "_hlshjk",
+ {23, 22},
+ { {"00"_b, "fmlal_asimdsame_f"},
+ {"10"_b, "fmlsl_asimdsame_f"},
+ },
+ },
+
+ { "_hmsgpj",
+ {13, 12, 10},
+ { {"000"_b, "_hthxvr"},
+ {"100"_b, "ptrue_p_s"},
+ {"101"_b, "_kkvrzq"},
+ {"110"_b, "_xxjrsy"},
+ },
+ },
+
+ { "_hmtmlq",
+ {4},
+ { {"0"_b, "nor_p_p_pp_z"},
+ {"1"_b, "nand_p_p_pp_z"},
+ },
+ },
+
+ { "_hmtxlh",
+ {9, 8, 7, 6, 5, 1, 0},
+ { {"1111111"_b, "retaa_64e_branch_reg"},
+ },
+ },
+
+ { "_hmxlny",
+ {13, 12, 11, 10},
+ { {"0000"_b, "addhn_asimddiff_n"},
+ {"0001"_b, "sshl_asimdsame_only"},
+ {"0010"_b, "_lyghyg"},
+ {"0011"_b, "sqshl_asimdsame_only"},
+ {"0100"_b, "sabal_asimddiff_l"},
+ {"0101"_b, "srshl_asimdsame_only"},
+ {"0110"_b, "_htgzzx"},
+ {"0111"_b, "sqrshl_asimdsame_only"},
+ {"1000"_b, "subhn_asimddiff_n"},
+ {"1001"_b, "smax_asimdsame_only"},
+ {"1010"_b, "_sqpjtr"},
+ {"1011"_b, "smin_asimdsame_only"},
+ {"1100"_b, "sabdl_asimddiff_l"},
+ {"1101"_b, "sabd_asimdsame_only"},
+ {"1110"_b, "_rnrzsj"},
+ {"1111"_b, "saba_asimdsame_only"},
+ },
+ },
+
+ { "_hngpgx",
+ {23, 10, 4},
+ { {"000"_b, "_vxsjgg"},
+ },
+ },
+
+ { "_hngpxg",
+ {1, 0},
+ { {"00"_b, "br_64_branch_reg"},
+ },
+ },
+
+ { "_hnjrmp",
+ {4},
+ { {"0"_b, "cmplo_p_p_zi"},
+ {"1"_b, "cmpls_p_p_zi"},
+ },
+ },
+
+ { "_hnzzkj",
+ {30, 18},
+ { {"00"_b, "_gxslgq"},
+ },
+ },
+
+ { "_hpgqlp",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_s_floatimm"},
+ },
+ },
+
+ { "_hqhzgj",
+ {17},
+ { {"0"_b, "ld2_asisdlso_b2_2b"},
+ },
+ },
+
+ { "_hqlskj",
+ {18, 17},
+ { {"00"_b, "ld1_asisdlse_r1_1v"},
+ },
+ },
+
+ { "_hqnxvt",
+ {13, 12, 11, 10},
+ { {"0000"_b, "saddl_asimddiff_l"},
+ {"0001"_b, "shadd_asimdsame_only"},
+ {"0010"_b, "_rykykh"},
+ {"0011"_b, "sqadd_asimdsame_only"},
+ {"0100"_b, "saddw_asimddiff_w"},
+ {"0101"_b, "srhadd_asimdsame_only"},
+ {"0110"_b, "_glkzlv"},
+ {"0111"_b, "_rnktts"},
+ {"1000"_b, "ssubl_asimddiff_l"},
+ {"1001"_b, "shsub_asimdsame_only"},
+ {"1010"_b, "_rgztzl"},
+ {"1011"_b, "sqsub_asimdsame_only"},
+ {"1100"_b, "ssubw_asimddiff_w"},
+ {"1101"_b, "cmgt_asimdsame_only"},
+ {"1110"_b, "_nyxxks"},
+ {"1111"_b, "cmge_asimdsame_only"},
+ },
+ },
+
+ { "_hqsvmh",
+ {18, 17},
+ { {"00"_b, "st4_asisdlso_s4_4s"},
+ },
+ },
+
+ { "_hrhzqy",
+ {17},
+ { {"0"_b, "ld4_asisdlse_r4"},
+ },
+ },
+
+ { "_hrktgs",
+ {12},
+ { {"0"_b, "st2_asisdlsop_dx2_r2d"},
+ },
+ },
+
+ { "_hrllsn",
+ {18, 17, 16},
+ { {"000"_b, "fadd_z_p_zz"},
+ {"001"_b, "fsub_z_p_zz"},
+ {"010"_b, "fmul_z_p_zz"},
+ {"011"_b, "fsubr_z_p_zz"},
+ {"100"_b, "fmaxnm_z_p_zz"},
+ {"101"_b, "fminnm_z_p_zz"},
+ {"110"_b, "fmax_z_p_zz"},
+ {"111"_b, "fmin_z_p_zz"},
+ },
+ },
+
+ { "_hrxyts",
+ {23, 22, 20, 19, 18, 13},
+ { {"00000x"_b, "orr_z_zi"},
+ {"01000x"_b, "eor_z_zi"},
+ {"10000x"_b, "and_z_zi"},
+ {"11000x"_b, "dupm_z_i"},
+ {"xx1xx0"_b, "fcpy_z_p_i"},
+ },
+ },
+
+ { "_hsjynv",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ },
+ },
+
+ { "_hstvrp",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fmov_d_floatdp1"},
+ {"000010"_b, "fneg_d_floatdp1"},
+ {"000100"_b, "fcvt_sd_floatdp1"},
+ {"000110"_b, "bfcvt_bs_floatdp1"},
+ {"001000"_b, "frintn_d_floatdp1"},
+ {"001010"_b, "frintm_d_floatdp1"},
+ {"001100"_b, "frinta_d_floatdp1"},
+ {"001110"_b, "frintx_d_floatdp1"},
+ {"010000"_b, "frint32z_d_floatdp1"},
+ {"010010"_b, "frint64z_d_floatdp1"},
+ },
+ },
+
+ { "_hsvgnt",
+ {23, 22, 4, 3, 2, 1, 0},
+ { {"0000001"_b, "svc_ex_exception"},
+ {"0000010"_b, "hvc_ex_exception"},
+ {"0000011"_b, "smc_ex_exception"},
+ {"0100000"_b, "hlt_ex_exception"},
+ },
+ },
+
+ { "_htgzzx",
+ {20, 18, 17, 16},
+ { {"0000"_b, "_mqgtsq"},
+ },
+ },
+
+ { "_hthxvr",
+ {23, 22, 9},
+ { {"010"_b, "pfirst_p_p_p"},
+ },
+ },
+
+ { "_htmthz",
+ {22, 20, 19, 18, 17, 16, 13, 12},
+ { {"01111100"_b, "_msztzv"},
+ },
+ },
+
+ { "_htnmls",
+ {22, 13, 12},
+ { {"000"_b, "ldapr_32l_memop"},
+ },
+ },
+
+ { "_htplsj",
+ {4},
+ { {"0"_b, "cmpeq_p_p_zz"},
+ {"1"_b, "cmpne_p_p_zz"},
+ },
+ },
+
+ { "_htppjj",
+ {30, 23, 22},
+ { {"000"_b, "msub_64a_dp_3src"},
+ },
+ },
+
+ { "_htqpks",
+ {30, 20, 19, 18, 17, 16, 13},
+ { {"000000x"_b, "add_z_zi"},
+ {"000001x"_b, "sub_z_zi"},
+ {"000011x"_b, "subr_z_zi"},
+ {"000100x"_b, "sqadd_z_zi"},
+ {"000101x"_b, "uqadd_z_zi"},
+ {"000110x"_b, "sqsub_z_zi"},
+ {"000111x"_b, "uqsub_z_zi"},
+ {"0010000"_b, "smax_z_zi"},
+ {"0010010"_b, "umax_z_zi"},
+ {"0010100"_b, "smin_z_zi"},
+ {"0010110"_b, "umin_z_zi"},
+ {"0100000"_b, "mul_z_zi"},
+ {"011000x"_b, "dup_z_i"},
+ {"0110010"_b, "fdup_z_i"},
+ {"1xxxxx0"_b, "fnmad_z_p_zzz"},
+ {"1xxxxx1"_b, "fnmsb_z_p_zzz"},
+ },
+ },
+
+ { "_hvvyhl",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0x00001"_b, "frint32z_asimdmisc_r"},
+ {"1111000"_b, "fcmlt_asimdmiscfp16_fz"},
+ {"1x00000"_b, "fcmlt_asimdmisc_fz"},
+ },
+ },
+
+ { "_hvyjnk",
+ {11},
+ { {"0"_b, "sqrdmulh_z_zzi_h"},
+ },
+ },
+
+ { "_hxglyp",
+ {17},
+ { {"0"_b, "ld4_asisdlsep_r4_r"},
+ {"1"_b, "ld4_asisdlsep_i4_i"},
+ },
+ },
+
+ { "_hxmjhn",
+ {30, 23, 22, 19, 16},
+ { {"10010"_b, "aese_b_cryptoaes"},
+ {"xxx00"_b, "cls_asimdmisc_r"},
+ {"xxx01"_b, "sqxtn_asimdmisc_n"},
+ },
+ },
+
+ { "_hxnmsl",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld2w_z_p_bi_contiguous"},
+ {"000x0"_b, "ld2w_z_p_br_contiguous"},
+ {"00101"_b, "ld4w_z_p_bi_contiguous"},
+ {"001x0"_b, "ld4w_z_p_br_contiguous"},
+ {"01001"_b, "ld2d_z_p_bi_contiguous"},
+ {"010x0"_b, "ld2d_z_p_br_contiguous"},
+ {"01101"_b, "ld4d_z_p_bi_contiguous"},
+ {"011x0"_b, "ld4d_z_p_br_contiguous"},
+ {"10011"_b, "st2w_z_p_bi_contiguous"},
+ {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"},
+ {"10111"_b, "st4w_z_p_bi_contiguous"},
+ {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"},
+ {"10x01"_b, "st1w_z_p_bi"},
+ {"11011"_b, "st2d_z_p_bi_contiguous"},
+ {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"},
+ {"11111"_b, "st4d_z_p_bi_contiguous"},
+ {"11x01"_b, "st1d_z_p_bi"},
+ },
+ },
+
+ { "_hxrtsq",
+ {23, 22, 12},
+ { {"000"_b, "_gxlvsg"},
+ {"001"_b, "_kxhjtk"},
+ {"010"_b, "_hyxhpl"},
+ {"011"_b, "_kvgjzh"},
+ {"110"_b, "_tpsylx"},
+ {"111"_b, "_zhpxqz"},
+ },
+ },
+
+ { "_hxzlmm",
+ {30, 23, 22},
+ { {"000"_b, "stxp_sp32_ldstexcl"},
+ {"001"_b, "ldxp_lp32_ldstexcl"},
+ {"100"_b, "stxp_sp64_ldstexcl"},
+ {"101"_b, "ldxp_lp64_ldstexcl"},
+ },
+ },
+
+ { "_hykhmt",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "saddv_r_p_z"},
+ {"00001"_b, "uaddv_r_p_z"},
+ {"01000"_b, "smaxv_r_p_z"},
+ {"01001"_b, "umaxv_r_p_z"},
+ {"01010"_b, "sminv_r_p_z"},
+ {"01011"_b, "uminv_r_p_z"},
+ {"1000x"_b, "movprfx_z_p_z"},
+ {"11000"_b, "orv_r_p_z"},
+ {"11001"_b, "eorv_r_p_z"},
+ {"11010"_b, "andv_r_p_z"},
+ },
+ },
+
+ { "_hyxhpl",
+ {13},
+ { {"0"_b, "_yrrppk"},
+ {"1"_b, "_pnxggm"},
+ },
+ },
+
+ { "_hyymjs",
+ {18, 17, 12},
+ { {"0x0"_b, "ld2_asisdlsop_dx2_r2d"},
+ {"100"_b, "ld2_asisdlsop_dx2_r2d"},
+ {"110"_b, "ld2_asisdlsop_d2_i2d"},
+ },
+ },
+
+ { "_hzkglv",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ld1b_z_p_br_u8"},
+ {"0001"_b, "ldff1b_z_p_br_u8"},
+ {"0010"_b, "ld1b_z_p_br_u32"},
+ {"0011"_b, "ldff1b_z_p_br_u32"},
+ {"0100"_b, "ld1sw_z_p_br_s64"},
+ {"0101"_b, "ldff1sw_z_p_br_s64"},
+ {"0110"_b, "ld1h_z_p_br_u32"},
+ {"0111"_b, "ldff1h_z_p_br_u32"},
+ {"1001"_b, "stnt1b_z_p_br_contiguous"},
+ {"1011"_b, "st3b_z_p_br_contiguous"},
+ {"10x0"_b, "st1b_z_p_br"},
+ {"1101"_b, "stnt1h_z_p_br_contiguous"},
+ {"1111"_b, "st3h_z_p_br_contiguous"},
+ {"11x0"_b, "st1h_z_p_br"},
+ },
+ },
+
+ { "_hzllgl",
+ {17},
+ { {"0"_b, "st1_asisdlse_r4_4v"},
+ },
+ },
+
+ { "_hzmlps",
+ {19},
+ { {"0"_b, "_rpqgjl"},
+ {"1"_b, "sys_cr_systeminstrs"},
+ },
+ },
+
+ { "_hzxjsp",
+ {23, 22, 20, 19, 16, 13, 10},
+ { {"0000000"_b, "_shgkvq"},
+ {"0000001"_b, "_vytxll"},
+ {"0000010"_b, "_hqsvmh"},
+ {"0000011"_b, "_gmvrxn"},
+ {"0100000"_b, "_ygyxvx"},
+ {"0100001"_b, "_tszvvk"},
+ {"0100010"_b, "_tyjqvt"},
+ {"0100011"_b, "_ylqnqt"},
+ {"100xx00"_b, "st2_asisdlsop_sx2_r2s"},
+ {"100xx01"_b, "_hrktgs"},
+ {"100xx10"_b, "st4_asisdlsop_sx4_r4s"},
+ {"100xx11"_b, "_mmrtvz"},
+ {"1010x00"_b, "st2_asisdlsop_sx2_r2s"},
+ {"1010x01"_b, "_lmtnzv"},
+ {"1010x10"_b, "st4_asisdlsop_sx4_r4s"},
+ {"1010x11"_b, "_qrykhm"},
+ {"1011000"_b, "st2_asisdlsop_sx2_r2s"},
+ {"1011001"_b, "_nyssqn"},
+ {"1011010"_b, "st4_asisdlsop_sx4_r4s"},
+ {"1011011"_b, "_kpqgsn"},
+ {"1011100"_b, "_knpsmq"},
+ {"1011101"_b, "_jzyzjh"},
+ {"1011110"_b, "_vhhktl"},
+ {"1011111"_b, "_yjxvkp"},
+ {"110xx00"_b, "ld2_asisdlsop_sx2_r2s"},
+ {"110xx01"_b, "_zppjvk"},
+ {"110xx10"_b, "ld4_asisdlsop_sx4_r4s"},
+ {"110xx11"_b, "_kqjmvy"},
+ {"1110x00"_b, "ld2_asisdlsop_sx2_r2s"},
+ {"1110x01"_b, "_ptkrvg"},
+ {"1110x10"_b, "ld4_asisdlsop_sx4_r4s"},
+ {"1110x11"_b, "_kjryvx"},
+ {"1111000"_b, "ld2_asisdlsop_sx2_r2s"},
+ {"1111001"_b, "_mlvpxh"},
+ {"1111010"_b, "ld4_asisdlsop_sx4_r4s"},
+ {"1111011"_b, "_xqjrgk"},
+ {"1111100"_b, "_msgqps"},
+ {"1111101"_b, "_hyymjs"},
+ {"1111110"_b, "_qsnqpz"},
+ {"1111111"_b, "_gzvgmh"},
+ },
+ },
+
+ { "_jggvph",
+ {30},
+ { {"0"_b, "bic_64_log_shift"},
+ {"1"_b, "eon_64_log_shift"},
+ },
+ },
+
+ { "_jgmlpk",
+ {4},
+ { {"0"_b, "match_p_p_zz"},
+ {"1"_b, "nmatch_p_p_zz"},
+ },
+ },
+
+ { "_jgyhrh",
+ {4},
+ { {"0"_b, "cmplo_p_p_zi"},
+ {"1"_b, "cmpls_p_p_zi"},
+ },
+ },
+
+ { "_jhkglp",
+ {30, 23, 22},
+ { {"110"_b, "xar_vvv2_crypto3_imm6"},
+ },
+ },
+
+ { "_jhllmn",
+ {4},
+ { {"0"_b, "cmpge_p_p_zz"},
+ {"1"_b, "cmpgt_p_p_zz"},
+ },
+ },
+
+ { "_jhqlkv",
+ {30, 23, 22},
+ { {"000"_b, "stxr_sr32_ldstexcl"},
+ {"001"_b, "ldxr_lr32_ldstexcl"},
+ {"010"_b, "stllr_sl32_ldstexcl"},
+ {"011"_b, "ldlar_lr32_ldstexcl"},
+ {"100"_b, "stxr_sr64_ldstexcl"},
+ {"101"_b, "ldxr_lr64_ldstexcl"},
+ {"110"_b, "stllr_sl64_ldstexcl"},
+ {"111"_b, "ldlar_lr64_ldstexcl"},
+ },
+ },
+
+ { "_jhytlg",
+ {30, 23, 22, 13, 11, 10},
+ { {"000010"_b, "str_b_ldst_regoff"},
+ {"000110"_b, "str_bl_ldst_regoff"},
+ {"001010"_b, "ldr_b_ldst_regoff"},
+ {"001110"_b, "ldr_bl_ldst_regoff"},
+ {"010x10"_b, "str_q_ldst_regoff"},
+ {"011x10"_b, "ldr_q_ldst_regoff"},
+ {"100x10"_b, "str_h_ldst_regoff"},
+ {"101x10"_b, "ldr_h_ldst_regoff"},
+ },
+ },
+
+ { "_jkkqvy",
+ {22, 20, 11},
+ { {"100"_b, "uqinch_z_zs"},
+ {"101"_b, "uqdech_z_zs"},
+ {"110"_b, "dech_z_zs"},
+ },
+ },
+
+ { "_jkpsxk",
+ {20},
+ { {"0"_b, "_kyygzs"},
+ {"1"_b, "msr_sr_systemmove"},
+ },
+ },
+
+ { "_jkqktg",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "sqneg_asimdmisc_r"},
+ },
+ },
+
+ { "_jkrlsg",
+ {23, 22},
+ { {"00"_b, "fmsub_s_floatdp3"},
+ {"01"_b, "fmsub_d_floatdp3"},
+ {"11"_b, "fmsub_h_floatdp3"},
+ },
+ },
+
+ { "_jksztq",
+ {22, 20, 19, 13, 12},
+ { {"0x100"_b, "sri_asisdshf_r"},
+ {"0x101"_b, "sli_asisdshf_r"},
+ {"0x110"_b, "sqshlu_asisdshf_r"},
+ {"0x111"_b, "uqshl_asisdshf_r"},
+ {"10x00"_b, "sri_asisdshf_r"},
+ {"10x01"_b, "sli_asisdshf_r"},
+ {"10x10"_b, "sqshlu_asisdshf_r"},
+ {"10x11"_b, "uqshl_asisdshf_r"},
+ {"11100"_b, "sri_asisdshf_r"},
+ {"11101"_b, "sli_asisdshf_r"},
+ {"11110"_b, "sqshlu_asisdshf_r"},
+ {"11111"_b, "uqshl_asisdshf_r"},
+ {"x1000"_b, "sri_asisdshf_r"},
+ {"x1001"_b, "sli_asisdshf_r"},
+ {"x1010"_b, "sqshlu_asisdshf_r"},
+ {"x1011"_b, "uqshl_asisdshf_r"},
+ },
+ },
+
+ { "_jkxlnq",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_nhzyvv"},
+ },
+ },
+
+ { "_jlqjzr",
+ {30, 23},
+ { {"00"_b, "adds_64s_addsub_imm"},
+ {"10"_b, "subs_64s_addsub_imm"},
+ },
+ },
+
+ { "_jlqxvj",
+ {23, 22},
+ { {"01"_b, "_mplgqv"},
+ {"10"_b, "xar_vvv2_crypto3_imm6"},
+ {"11"_b, "_ljhtkq"},
+ },
+ },
+
+ { "_jlrrlt",
+ {11, 10, 4},
+ { {"000"_b, "whilege_p_p_rr"},
+ {"001"_b, "whilegt_p_p_rr"},
+ {"010"_b, "whilelt_p_p_rr"},
+ {"011"_b, "whilele_p_p_rr"},
+ {"100"_b, "whilehs_p_p_rr"},
+ {"101"_b, "whilehi_p_p_rr"},
+ {"110"_b, "whilelo_p_p_rr"},
+ {"111"_b, "whilels_p_p_rr"},
+ },
+ },
+
+ { "_jlrvpl",
+ {17},
+ { {"0"_b, "st2_asisdlse_r2"},
+ },
+ },
+
+ { "_jmgkrl",
+ {30},
+ { {"0"_b, "orn_32_log_shift"},
+ {"1"_b, "bics_32_log_shift"},
+ },
+ },
+
+ { "_jmvgsp",
+ {22, 20, 11},
+ { {"100"_b, "sqinch_z_zs"},
+ {"101"_b, "sqdech_z_zs"},
+ {"110"_b, "inch_z_zs"},
+ },
+ },
+
+ { "_jmxstz",
+ {13, 12, 11, 10},
+ { {"0000"_b, "sqdecp_z_p_z"},
+ {"0010"_b, "sqdecp_r_p_r_sx"},
+ {"0011"_b, "sqdecp_r_p_r_x"},
+ },
+ },
+
+ { "_jmyslr",
+ {17},
+ { {"0"_b, "ld1_asisdlsep_r4_r4"},
+ {"1"_b, "ld1_asisdlsep_i4_i4"},
+ },
+ },
+
+ { "_jnjlsh",
+ {12},
+ { {"0"_b, "st1_asisdlsop_dx1_r1d"},
+ },
+ },
+
+ { "_jnmgrh",
+ {30, 19, 18, 17, 16},
+ { {"11000"_b, "ins_asimdins_iv_v"},
+ {"1x100"_b, "ins_asimdins_iv_v"},
+ {"1xx10"_b, "ins_asimdins_iv_v"},
+ {"1xxx1"_b, "ins_asimdins_iv_v"},
+ },
+ },
+
+ { "_jplmmr",
+ {23, 22, 20, 19, 16, 13, 12},
+ { {"0111100"_b, "fcvtas_asisdmiscfp16_r"},
+ {"0111101"_b, "scvtf_asisdmiscfp16_r"},
+ {"0x00100"_b, "fcvtas_asisdmisc_r"},
+ {"0x00101"_b, "scvtf_asisdmisc_r"},
+ {"0x10000"_b, "fmaxnmp_asisdpair_only_h"},
+ {"0x10001"_b, "faddp_asisdpair_only_h"},
+ {"0x10011"_b, "fmaxp_asisdpair_only_h"},
+ {"1111000"_b, "fcmgt_asisdmiscfp16_fz"},
+ {"1111001"_b, "fcmeq_asisdmiscfp16_fz"},
+ {"1111010"_b, "fcmlt_asisdmiscfp16_fz"},
+ {"1111101"_b, "frecpe_asisdmiscfp16_r"},
+ {"1111111"_b, "frecpx_asisdmiscfp16_r"},
+ {"1x00000"_b, "fcmgt_asisdmisc_fz"},
+ {"1x00001"_b, "fcmeq_asisdmisc_fz"},
+ {"1x00010"_b, "fcmlt_asisdmisc_fz"},
+ {"1x00101"_b, "frecpe_asisdmisc_r"},
+ {"1x00111"_b, "frecpx_asisdmisc_r"},
+ {"1x10000"_b, "fminnmp_asisdpair_only_h"},
+ {"1x10011"_b, "fminp_asisdpair_only_h"},
+ },
+ },
+
+ { "_jpvljz",
+ {23, 22},
+ { {"01"_b, "fcmeq_asimdsamefp16_only"},
+ },
+ },
+
+ { "_jpxgqh",
+ {30, 23, 22},
+ { {"000"_b, "sbfm_32m_bitfield"},
+ {"100"_b, "ubfm_32m_bitfield"},
+ },
+ },
+
+ { "_jqjnrv",
+ {18, 17},
+ { {"00"_b, "st1_asisdlso_s1_1s"},
+ },
+ },
+
+ { "_jqnglz",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "ucvtf_asisdshf_c"},
+ {"001x0"_b, "ucvtf_asisdshf_c"},
+ {"01xx0"_b, "ucvtf_asisdshf_c"},
+ },
+ },
+
+ { "_jqnhrj",
+ {12, 10},
+ { {"00"_b, "_mzynlp"},
+ {"01"_b, "_mvglql"},
+ {"10"_b, "_tylqpt"},
+ {"11"_b, "_lrjyhr"},
+ },
+ },
+
+ { "_jqplxx",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"1111100"_b, "_xpvpqq"},
+ },
+ },
+
+ { "_jqtltz",
+ {13},
+ { {"0"_b, "mul_asimdelem_r"},
+ {"1"_b, "smull_asimdelem_l"},
+ },
+ },
+
+ { "_jqxqql",
+ {22, 20, 11},
+ { {"000"_b, "uqincw_z_zs"},
+ {"001"_b, "uqdecw_z_zs"},
+ {"010"_b, "decw_z_zs"},
+ {"100"_b, "uqincd_z_zs"},
+ {"101"_b, "uqdecd_z_zs"},
+ {"110"_b, "decd_z_zs"},
+ },
+ },
+
+ { "_jrgzxt",
+ {18, 17},
+ { {"00"_b, "ld3_asisdlse_r3"},
+ },
+ },
+
+ { "_jrlynj",
+ {11, 10},
+ { {"00"_b, "_gzqvnk"},
+ },
+ },
+
+ { "_jrnlzs",
+ {13, 12, 11},
+ { {"000"_b, "fminnmp_asimdsamefp16_only"},
+ {"010"_b, "fabd_asimdsamefp16_only"},
+ {"100"_b, "fcmgt_asimdsamefp16_only"},
+ {"101"_b, "facgt_asimdsamefp16_only"},
+ {"110"_b, "fminp_asimdsamefp16_only"},
+ },
+ },
+
+ { "_jrnxzh",
+ {12},
+ { {"0"_b, "cmla_z_zzz"},
+ {"1"_b, "sqrdcmlah_z_zzz"},
+ },
+ },
+
+ { "_jrsptt",
+ {13, 12},
+ { {"00"_b, "sqadd_asisdsame_only"},
+ {"10"_b, "sqsub_asisdsame_only"},
+ {"11"_b, "cmge_asisdsame_only"},
+ },
+ },
+
+ { "_jryylt",
+ {30, 23, 22, 19, 18, 17, 16},
+ { {"00000x1"_b, "smov_asimdins_w_w"},
+ {"0000x10"_b, "smov_asimdins_w_w"},
+ {"00010xx"_b, "smov_asimdins_w_w"},
+ {"0001110"_b, "smov_asimdins_w_w"},
+ {"000x10x"_b, "smov_asimdins_w_w"},
+ {"000x111"_b, "smov_asimdins_w_w"},
+ {"10000x1"_b, "smov_asimdins_x_x"},
+ {"1000x10"_b, "smov_asimdins_x_x"},
+ {"10010xx"_b, "smov_asimdins_x_x"},
+ {"1001110"_b, "smov_asimdins_x_x"},
+ {"100x10x"_b, "smov_asimdins_x_x"},
+ {"100x111"_b, "smov_asimdins_x_x"},
+ },
+ },
+
+ { "_jsygzs",
+ {30, 23, 22, 12, 11, 10},
+ { {"0000xx"_b, "add_64_addsub_ext"},
+ {"000100"_b, "add_64_addsub_ext"},
+ {"1000xx"_b, "sub_64_addsub_ext"},
+ {"100100"_b, "sub_64_addsub_ext"},
+ },
+ },
+
+ { "_jtqlhs",
+ {22},
+ { {"0"_b, "str_64_ldst_regoff"},
+ {"1"_b, "ldr_64_ldst_regoff"},
+ },
+ },
+
+ { "_jvhnxl",
+ {23},
+ { {"0"_b, "fcmge_asimdsame_only"},
+ {"1"_b, "fcmgt_asimdsame_only"},
+ },
+ },
+
+ { "_jvpqrp",
+ {23, 22},
+ { {"00"_b, "fmla_asisdelem_rh_h"},
+ {"1x"_b, "fmla_asisdelem_r_sd"},
+ },
+ },
+
+ { "_jvvzjq",
+ {23, 22},
+ { {"00"_b, "fcsel_s_floatsel"},
+ {"01"_b, "fcsel_d_floatsel"},
+ {"11"_b, "fcsel_h_floatsel"},
+ },
+ },
+
+ { "_jxrlyh",
+ {12},
+ { {"0"_b, "_mtgksl"},
+ },
+ },
+
+ { "_jxszhy",
+ {23, 22, 11},
+ { {"000"_b, "_rqhryp"},
+ },
+ },
+
+ { "_jxtgtx",
+ {30, 23, 22},
+ { {"000"_b, "str_b_ldst_pos"},
+ {"001"_b, "ldr_b_ldst_pos"},
+ {"010"_b, "str_q_ldst_pos"},
+ {"011"_b, "ldr_q_ldst_pos"},
+ {"100"_b, "str_h_ldst_pos"},
+ {"101"_b, "ldr_h_ldst_pos"},
+ },
+ },
+
+ { "_jxyskn",
+ {13, 12, 11, 10},
+ { {"0000"_b, "uqincp_z_p_z"},
+ {"0010"_b, "uqincp_r_p_r_uw"},
+ {"0011"_b, "uqincp_r_p_r_x"},
+ },
+ },
+
+ { "_jxzrxm",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "usqadd_asisdmisc_r"},
+ },
+ },
+
+ { "_jymnkk",
+ {23, 22, 12, 11, 10},
+ { {"01000"_b, "bfdot_z_zzzi"},
+ {"100x0"_b, "fmlalb_z_zzzi_s"},
+ {"100x1"_b, "fmlalt_z_zzzi_s"},
+ {"110x0"_b, "bfmlalb_z_zzzi"},
+ {"110x1"_b, "bfmlalt_z_zzzi"},
+ },
+ },
+
+ { "_jyxszq",
+ {30, 4},
+ { {"0x"_b, "b_only_branch_imm"},
+ {"10"_b, "b_only_condbranch"},
+ },
+ },
+
+ { "_jzjvtv",
+ {19, 18, 17, 16, 4},
+ { {"00000"_b, "brkbs_p_p_p_z"},
+ },
+ },
+
+ { "_jzkqhn",
+ {23, 22, 12, 11, 10},
+ { {"10000"_b, "fmlslb_z_zzz"},
+ {"10001"_b, "fmlslt_z_zzz"},
+ },
+ },
+
+ { "_jzyzjh",
+ {18, 17, 12},
+ { {"0x0"_b, "st2_asisdlsop_dx2_r2d"},
+ {"100"_b, "st2_asisdlsop_dx2_r2d"},
+ {"110"_b, "st2_asisdlsop_d2_i2d"},
+ },
+ },
+
+ { "_kgmqkh",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ld1w_z_p_ai_s"},
+ {"0001"_b, "ldff1w_z_p_ai_s"},
+ {"0010"_b, "ld1rw_z_p_bi_u32"},
+ {"0011"_b, "ld1rw_z_p_bi_u64"},
+ {"0110"_b, "ld1rsb_z_p_bi_s16"},
+ {"0111"_b, "ld1rd_z_p_bi_u64"},
+ {"1000"_b, "ld1w_z_p_ai_d"},
+ {"1001"_b, "ldff1w_z_p_ai_d"},
+ {"1010"_b, "ld1w_z_p_bz_d_64_scaled"},
+ {"1011"_b, "ldff1w_z_p_bz_d_64_scaled"},
+ {"1100"_b, "ld1d_z_p_ai_d"},
+ {"1101"_b, "ldff1d_z_p_ai_d"},
+ {"1110"_b, "ld1d_z_p_bz_d_64_scaled"},
+ {"1111"_b, "ldff1d_z_p_bz_d_64_scaled"},
+ },
+ },
+
+ { "_kgpgly",
+ {23, 22, 10},
+ { {"100"_b, "smlslb_z_zzzi_s"},
+ {"101"_b, "smlslt_z_zzzi_s"},
+ {"110"_b, "smlslb_z_zzzi_d"},
+ {"111"_b, "smlslt_z_zzzi_d"},
+ },
+ },
+
+ { "_khjvqq",
+ {22, 11},
+ { {"00"_b, "sqrdmulh_z_zzi_s"},
+ {"10"_b, "sqrdmulh_z_zzi_d"},
+ },
+ },
+
+ { "_kjghlk",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "orr_asimdimm_l_sl"},
+ {"00x100"_b, "ssra_asimdshf_r"},
+ {"00x110"_b, "srsra_asimdshf_r"},
+ {"010x00"_b, "ssra_asimdshf_r"},
+ {"010x10"_b, "srsra_asimdshf_r"},
+ {"011100"_b, "ssra_asimdshf_r"},
+ {"011110"_b, "srsra_asimdshf_r"},
+ {"0x1000"_b, "ssra_asimdshf_r"},
+ {"0x1010"_b, "srsra_asimdshf_r"},
+ },
+ },
+
+ { "_kjngjl",
+ {23, 22},
+ { {"00"_b, "tbx_asimdtbl_l1_1"},
+ },
+ },
+
+ { "_kjpxvh",
+ {20, 19, 18},
+ { {"000"_b, "_yyrkmn"},
+ },
+ },
+
+ { "_kjqynn",
+ {4},
+ { {"0"_b, "cmphs_p_p_zi"},
+ {"1"_b, "cmphi_p_p_zi"},
+ },
+ },
+
+ { "_kjrxpx",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "ucvtf_asimdmiscfp16_r"},
+ {"0x00001"_b, "ucvtf_asimdmisc_r"},
+ {"1111000"_b, "fcmle_asimdmiscfp16_fz"},
+ {"1111001"_b, "frsqrte_asimdmiscfp16_r"},
+ {"1x00000"_b, "fcmle_asimdmisc_fz"},
+ {"1x00001"_b, "frsqrte_asimdmisc_r"},
+ },
+ },
+
+ { "_kjryvx",
+ {12},
+ { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
+ },
+ },
+
+ { "_kjyphv",
+ {20, 19, 18, 17, 16},
+ { {"10000"_b, "fmaxp_asisdpair_only_sd"},
+ },
+ },
+
+ { "_kkgpjl",
+ {20, 19, 18, 17},
+ { {"0000"_b, "_msqkyy"},
+ },
+ },
+
+ { "_kkgzst",
+ {23, 22, 13, 12, 11, 10},
+ { {"0001x0"_b, "fmla_asimdelem_rh_h"},
+ {"0x0001"_b, "sshr_asimdshf_r"},
+ {"0x0101"_b, "ssra_asimdshf_r"},
+ {"0x1001"_b, "srshr_asimdshf_r"},
+ {"0x1101"_b, "srsra_asimdshf_r"},
+ {"1000x0"_b, "fmlal_asimdelem_lh"},
+ {"1x01x0"_b, "fmla_asimdelem_r_sd"},
+ {"xx10x0"_b, "smlal_asimdelem_l"},
+ {"xx11x0"_b, "sqdmlal_asimdelem_l"},
+ },
+ },
+
+ { "_kkmjyr",
+ {0},
+ { {"1"_b, "blrabz_64_branch_reg"},
},
},
- { "UnallocSVEBroadcastIndexElement",
+ { "_kkmxxx",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_jqplxx"},
+ },
+ },
+
+ { "_kknjng",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "ssra_asisdshf_r"},
+ {"001x0"_b, "ssra_asisdshf_r"},
+ {"01xx0"_b, "ssra_asisdshf_r"},
+ },
+ },
+
+ { "_kktglv",
+ {30, 13, 12},
+ { {"000"_b, "_njvkjq"},
+ {"001"_b, "_rpzykx"},
+ {"010"_b, "_zzvxvh"},
+ {"011"_b, "_yqxnzl"},
+ {"100"_b, "_gxmnkl"},
+ {"110"_b, "_lkxgjy"},
+ {"111"_b, "_vjmklj"},
+ },
+ },
+
+ { "_kkvrzq",
+ {23, 22, 9, 8, 7, 6, 5},
+ { {"0000000"_b, "pfalse_p"},
+ },
+ },
+
+ { "_klkgqk",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtms_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtms_asimdmisc_r"},
+ {"1111001"_b, "fcvtzs_asimdmiscfp16_r"},
+ {"1x00001"_b, "fcvtzs_asimdmisc_r"},
+ {"xx00000"_b, "abs_asimdmisc_r"},
+ {"xx10001"_b, "addv_asimdall_only"},
+ },
+ },
+
+ { "_klnhpj",
+ {9, 8, 7, 6, 5, 1, 0},
+ { {"1111111"_b, "eretab_64e_branch_reg"},
+ },
+ },
+
+ { "_klthpn",
+ {30, 23, 22, 11, 10},
+ { {"01000"_b, "csel_64_condsel"},
+ {"01001"_b, "csinc_64_condsel"},
+ {"11000"_b, "csinv_64_condsel"},
+ {"11001"_b, "csneg_64_condsel"},
+ },
+ },
+
+ { "_kmhtqp",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ },
+ },
+
+ { "_kmkpnj",
+ {17},
+ { {"0"_b, "ld3_asisdlso_h3_3h"},
+ },
+ },
+
+ { "_knkjnz",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1sh_z_p_bi_s32"},
+ {"00011"_b, "ldnf1sh_z_p_bi_s32"},
+ {"00101"_b, "ld1w_z_p_bi_u64"},
+ {"00111"_b, "ldnf1w_z_p_bi_u64"},
+ {"01001"_b, "ld1sb_z_p_bi_s32"},
+ {"01011"_b, "ldnf1sb_z_p_bi_s32"},
+ {"01101"_b, "ld1d_z_p_bi_u64"},
+ {"01111"_b, "ldnf1d_z_p_bi_u64"},
+ {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"},
+ {"100x1"_b, "st1w_z_p_bz_d_64_scaled"},
+ {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"},
+ {"101x1"_b, "st1w_z_p_ai_s"},
+ {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"},
+ {"110x1"_b, "st1d_z_p_bz_d_64_scaled"},
+ },
+ },
+
+ { "_knpsmq",
+ {18, 17},
+ { {"0x"_b, "st2_asisdlsop_sx2_r2s"},
+ {"10"_b, "st2_asisdlsop_sx2_r2s"},
+ {"11"_b, "st2_asisdlsop_s2_i2s"},
+ },
+ },
+
+ { "_kpmvkn",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "stur_b_ldst_unscaled"},
+ {"00001"_b, "str_b_ldst_immpost"},
+ {"00011"_b, "str_b_ldst_immpre"},
+ {"00100"_b, "ldur_b_ldst_unscaled"},
+ {"00101"_b, "ldr_b_ldst_immpost"},
+ {"00111"_b, "ldr_b_ldst_immpre"},
+ {"01000"_b, "stur_q_ldst_unscaled"},
+ {"01001"_b, "str_q_ldst_immpost"},
+ {"01011"_b, "str_q_ldst_immpre"},
+ {"01100"_b, "ldur_q_ldst_unscaled"},
+ {"01101"_b, "ldr_q_ldst_immpost"},
+ {"01111"_b, "ldr_q_ldst_immpre"},
+ {"10000"_b, "stur_h_ldst_unscaled"},
+ {"10001"_b, "str_h_ldst_immpost"},
+ {"10011"_b, "str_h_ldst_immpre"},
+ {"10100"_b, "ldur_h_ldst_unscaled"},
+ {"10101"_b, "ldr_h_ldst_immpost"},
+ {"10111"_b, "ldr_h_ldst_immpre"},
+ },
+ },
+
+ { "_kpqgsn",
+ {12},
+ { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+ },
+ },
+
+ { "_kpxtsp",
+ {6, 5},
+ { {"00"_b, "cfinv_m_pstate"},
+ {"01"_b, "xaflag_m_pstate"},
+ {"10"_b, "axflag_m_pstate"},
+ },
+ },
+
+ { "_kpyqyv",
+ {12},
+ { {"0"_b, "_vjxqhp"},
+ },
+ },
+
+ { "_kqjmvy",
+ {12},
+ { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
+ },
+ },
+
+ { "_kqkhtz",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "autiza_64z_dp_1src"},
+ },
+ },
+
+ { "_kqvljp",
+ {18, 17, 16},
+ { {"000"_b, "fabd_z_p_zz"},
+ {"001"_b, "fscale_z_p_zz"},
+ {"010"_b, "fmulx_z_p_zz"},
+ {"100"_b, "fdivr_z_p_zz"},
+ {"101"_b, "fdiv_z_p_zz"},
+ },
+ },
+
+ { "_kqxhzx",
+ {20, 19, 18, 16, 12, 11, 10},
+ { {"0000xxx"_b, "_zmzxjm"},
+ {"0010xxx"_b, "_tmshps"},
+ {"0011xxx"_b, "_tsksxr"},
+ {"0110100"_b, "_pnzphx"},
+ {"0111100"_b, "_xpkkpn"},
+ {"1000xxx"_b, "_psqpkp"},
+ {"1001xxx"_b, "_phxkzh"},
+ {"1100xxx"_b, "_vsvrgt"},
+ },
+ },
+
+ { "_kqzmtr",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1b_z_p_bi_u16"},
+ {"00011"_b, "ldnf1b_z_p_bi_u16"},
+ {"00101"_b, "ld1b_z_p_bi_u64"},
+ {"00111"_b, "ldnf1b_z_p_bi_u64"},
+ {"01001"_b, "ld1h_z_p_bi_u16"},
+ {"01011"_b, "ldnf1h_z_p_bi_u16"},
+ {"01101"_b, "ld1h_z_p_bi_u64"},
+ {"01111"_b, "ldnf1h_z_p_bi_u64"},
+ {"101x1"_b, "st1b_z_p_ai_s"},
+ {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"},
+ {"110x1"_b, "st1h_z_p_bz_d_64_scaled"},
+ {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"},
+ {"111x1"_b, "st1h_z_p_ai_s"},
+ },
+ },
+
+ { "_krhrrr",
+ {12, 10},
+ { {"00"_b, "_xyzpvp"},
+ {"01"_b, "_nlyntn"},
+ {"10"_b, "_zhkjzg"},
+ {"11"_b, "_zmpzkg"},
+ },
+ },
+
+ { "_krlpjl",
+ {23, 22, 20, 19, 17, 16},
+ { {"000010"_b, "scvtf_s64_float2fix"},
+ {"000011"_b, "ucvtf_s64_float2fix"},
+ {"001100"_b, "fcvtzs_64s_float2fix"},
+ {"001101"_b, "fcvtzu_64s_float2fix"},
+ {"010010"_b, "scvtf_d64_float2fix"},
+ {"010011"_b, "ucvtf_d64_float2fix"},
+ {"011100"_b, "fcvtzs_64d_float2fix"},
+ {"011101"_b, "fcvtzu_64d_float2fix"},
+ {"110010"_b, "scvtf_h64_float2fix"},
+ {"110011"_b, "ucvtf_h64_float2fix"},
+ {"111100"_b, "fcvtzs_64h_float2fix"},
+ {"111101"_b, "fcvtzu_64h_float2fix"},
+ },
+ },
+
+ { "_kstltt",
+ {18, 17, 12},
+ { {"0x0"_b, "ld3_asisdlsop_dx3_r3d"},
+ {"100"_b, "ld3_asisdlsop_dx3_r3d"},
+ {"110"_b, "ld3_asisdlsop_d3_i3d"},
+ },
+ },
+
+ { "_ksvxxm",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "pacizb_64z_dp_1src"},
+ },
+ },
+
+ { "_ktnjrx",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "fnmadd_s_floatdp3"},
+ {"001xxxx"_b, "fnmadd_d_floatdp3"},
+ {"011xxxx"_b, "fnmadd_h_floatdp3"},
+ {"10001x0"_b, "fmls_asisdelem_rh_h"},
+ {"10x0101"_b, "shl_asisdshf_r"},
+ {"10x1101"_b, "sqshl_asisdshf_r"},
+ {"11x01x0"_b, "fmls_asisdelem_r_sd"},
+ {"1xx11x0"_b, "sqdmlsl_asisdelem_l"},
+ },
+ },
+
+ { "_ktrkrp",
+ {17},
+ { {"0"_b, "st3_asisdlso_h3_3h"},
+ },
+ },
+
+ { "_ktyppm",
+ {11, 10},
+ { {"00"_b, "asr_z_zw"},
+ {"01"_b, "lsr_z_zw"},
+ {"11"_b, "lsl_z_zw"},
+ },
+ },
+
+ { "_kvgjzh",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_d_floatimm"},
+ },
+ },
+
+ { "_kvmrng",
+ {23, 22},
+ { {"00"_b, "tbl_asimdtbl_l1_1"},
+ },
+ },
+
+ { "_kvnqhn",
+ {22, 20, 11},
+ { {"000"_b, "sqincw_r_rs_sx"},
+ {"001"_b, "sqdecw_r_rs_sx"},
+ {"010"_b, "sqincw_r_rs_x"},
+ {"011"_b, "sqdecw_r_rs_x"},
+ {"100"_b, "sqincd_r_rs_sx"},
+ {"101"_b, "sqdecd_r_rs_sx"},
+ {"110"_b, "sqincd_r_rs_x"},
+ {"111"_b, "sqdecd_r_rs_x"},
+ },
+ },
+
+ { "_kvyysq",
+ {12, 9, 8, 7, 6, 5},
+ { {"100000"_b, "_sjrqth"},
+ },
+ },
+
+ { "_kxhjtk",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_s_floatimm"},
+ },
+ },
+
+ { "_kxjgsz",
+ {23, 22, 20, 19, 11},
+ { {"00000"_b, "movi_asimdimm_m_sm"},
+ },
+ },
+
+ { "_kxkyqr",
+ {17},
+ { {"0"_b, "ld4_asisdlsop_hx4_r4h"},
+ {"1"_b, "ld4_asisdlsop_h4_i4h"},
+ },
+ },
+
+ { "_kxprqm",
+ {13, 12, 11, 10},
+ { {"0000"_b, "raddhn_asimddiff_n"},
+ {"0001"_b, "ushl_asimdsame_only"},
+ {"0010"_b, "_mmknzp"},
+ {"0011"_b, "uqshl_asimdsame_only"},
+ {"0100"_b, "uabal_asimddiff_l"},
+ {"0101"_b, "urshl_asimdsame_only"},
+ {"0110"_b, "_glgrjy"},
+ {"0111"_b, "uqrshl_asimdsame_only"},
+ {"1000"_b, "rsubhn_asimddiff_n"},
+ {"1001"_b, "umax_asimdsame_only"},
+ {"1010"_b, "_pxlnhs"},
+ {"1011"_b, "umin_asimdsame_only"},
+ {"1100"_b, "uabdl_asimddiff_l"},
+ {"1101"_b, "uabd_asimdsame_only"},
+ {"1110"_b, "_jkqktg"},
+ {"1111"_b, "uaba_asimdsame_only"},
+ },
+ },
+
+ { "_kxsysq",
+ {30},
+ { {"0"_b, "tbnz_only_testbranch"},
+ },
+ },
+
+ { "_kxvvkq",
+ {30, 23, 13},
+ { {"000"_b, "ld1b_z_p_bz_s_x32_unscaled"},
+ {"001"_b, "ldff1b_z_p_bz_s_x32_unscaled"},
+ {"010"_b, "ld1h_z_p_bz_s_x32_unscaled"},
+ {"011"_b, "ldff1h_z_p_bz_s_x32_unscaled"},
+ {"100"_b, "ld1b_z_p_bz_d_x32_unscaled"},
+ {"101"_b, "ldff1b_z_p_bz_d_x32_unscaled"},
+ {"110"_b, "ld1h_z_p_bz_d_x32_unscaled"},
+ {"111"_b, "ldff1h_z_p_bz_d_x32_unscaled"},
+ },
+ },
+
+ { "_kyjxrr",
+ {30, 13},
+ { {"00"_b, "_qtxpky"},
+ {"01"_b, "_hnjrmp"},
+ {"11"_b, "_vzjvtv"},
+ },
+ },
+
+ { "_kykymg",
+ {30},
+ { {"1"_b, "_rsyhtj"},
+ },
+ },
+
+ { "_kypqpy",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1010000"_b, "sm3partw1_vvv4_cryptosha512_3"},
+ {"1010001"_b, "sm3partw2_vvv4_cryptosha512_3"},
+ {"1010010"_b, "sm4ekey_vvv4_cryptosha512_3"},
+ },
+ },
+
+ { "_kyspnn",
+ {22},
+ { {"0"_b, "sqdmullb_z_zzi_s"},
+ {"1"_b, "sqdmullb_z_zzi_d"},
+ },
+ },
+
+ { "_kyxqgg",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "stgm_64bulk_ldsttags"},
+ },
+ },
+
+ { "_kyxrqg",
{10},
- { {"0", "VisitSVEBroadcastIndexElement"},
+ { {"0"_b, "uabalb_z_zzz"},
+ {"1"_b, "uabalt_z_zzz"},
+ },
+ },
+
+ { "_kyygzs",
+ {19},
+ { {"0"_b, "_nnkyzr"},
+ {"1"_b, "sys_cr_systeminstrs"},
},
},
- { "UnallocSVETableLookup",
+ { "_kyyzks",
+ {13, 12},
+ { {"00"_b, "sdiv_32_dp_2src"},
+ {"10"_b, "rorv_32_dp_2src"},
+ },
+ },
+
+ { "_kzmvpk",
+ {23, 22, 10},
+ { {"100"_b, "smlalb_z_zzzi_s"},
+ {"101"_b, "smlalt_z_zzzi_s"},
+ {"110"_b, "smlalb_z_zzzi_d"},
+ {"111"_b, "smlalt_z_zzzi_d"},
+ },
+ },
+
+ { "_kzrklp",
+ {17},
+ { {"0"_b, "ld4_asisdlso_b4_4b"},
+ },
+ },
+
+ { "_lgglzy",
+ {30, 23, 22, 19, 16},
+ { {"10010"_b, "aesimc_b_cryptoaes"},
+ {"x0x01"_b, "fcvtl_asimdmisc_l"},
+ {"xxx00"_b, "sqabs_asimdmisc_r"},
+ },
+ },
+
+ { "_lhmlrj",
+ {30, 23, 22, 20, 19},
+ { {"0xxxx"_b, "bl_only_branch_imm"},
+ {"10001"_b, "sysl_rc_systeminstrs"},
+ {"1001x"_b, "mrs_rs_systemmove"},
+ },
+ },
+
+ { "_lhpgsn",
+ {13, 12, 10},
+ { {"000"_b, "sqdmulh_asisdelem_r"},
+ {"010"_b, "sqrdmulh_asisdelem_r"},
+ {"101"_b, "_mxkgnq"},
+ {"111"_b, "_sgnknz"},
+ },
+ },
+
+ { "_lhtyjq",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_gskkxk"},
+ {"0000001"_b, "_ktrkrp"},
+ {"0100000"_b, "_nmtkjv"},
+ {"0100001"_b, "_kmkpnj"},
+ {"100xxx0"_b, "st1_asisdlsop_hx1_r1h"},
+ {"100xxx1"_b, "st3_asisdlsop_hx3_r3h"},
+ {"1010xx0"_b, "st1_asisdlsop_hx1_r1h"},
+ {"1010xx1"_b, "st3_asisdlsop_hx3_r3h"},
+ {"10110x0"_b, "st1_asisdlsop_hx1_r1h"},
+ {"10110x1"_b, "st3_asisdlsop_hx3_r3h"},
+ {"1011100"_b, "st1_asisdlsop_hx1_r1h"},
+ {"1011101"_b, "st3_asisdlsop_hx3_r3h"},
+ {"1011110"_b, "_mgmgqh"},
+ {"1011111"_b, "_gzylzp"},
+ {"110xxx0"_b, "ld1_asisdlsop_hx1_r1h"},
+ {"110xxx1"_b, "ld3_asisdlsop_hx3_r3h"},
+ {"1110xx0"_b, "ld1_asisdlsop_hx1_r1h"},
+ {"1110xx1"_b, "ld3_asisdlsop_hx3_r3h"},
+ {"11110x0"_b, "ld1_asisdlsop_hx1_r1h"},
+ {"11110x1"_b, "ld3_asisdlsop_hx3_r3h"},
+ {"1111100"_b, "ld1_asisdlsop_hx1_r1h"},
+ {"1111101"_b, "ld3_asisdlsop_hx3_r3h"},
+ {"1111110"_b, "_mrkkps"},
+ {"1111111"_b, "_xygxsv"},
+ },
+ },
+
+ { "_lhvtrp",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "orr_asimdimm_l_hl"},
+ {"00x100"_b, "sqshrn_asimdshf_n"},
+ {"00x101"_b, "sqrshrn_asimdshf_n"},
+ {"010x00"_b, "sqshrn_asimdshf_n"},
+ {"010x01"_b, "sqrshrn_asimdshf_n"},
+ {"011100"_b, "sqshrn_asimdshf_n"},
+ {"011101"_b, "sqrshrn_asimdshf_n"},
+ {"0x1000"_b, "sqshrn_asimdshf_n"},
+ {"0x1001"_b, "sqrshrn_asimdshf_n"},
+ },
+ },
+
+ { "_ljhtkq",
+ {20, 19, 18, 17, 16, 13, 12, 11},
+ { {"00000000"_b, "_yvyxkx"},
+ },
+ },
+
+ { "_ljljkv",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0001100"_b, "and_z_zz"},
+ {"0001110"_b, "eor3_z_zzz"},
+ {"0001111"_b, "bsl_z_zzz"},
+ {"0011100"_b, "orr_z_zz"},
+ {"0011110"_b, "bcax_z_zzz"},
+ {"0011111"_b, "bsl1n_z_zzz"},
+ {"0101100"_b, "eor_z_zz"},
+ {"0101111"_b, "bsl2n_z_zzz"},
+ {"0111100"_b, "bic_z_zz"},
+ {"0111111"_b, "nbsl_z_zzz"},
+ {"0xx0000"_b, "add_z_zz"},
+ {"0xx0001"_b, "sub_z_zz"},
+ {"0xx0100"_b, "sqadd_z_zz"},
+ {"0xx0101"_b, "uqadd_z_zz"},
+ {"0xx0110"_b, "sqsub_z_zz"},
+ {"0xx0111"_b, "uqsub_z_zz"},
+ {"0xx1101"_b, "xar_z_zzi"},
+ {"10x0010"_b, "mla_z_zzzi_h"},
+ {"10x0011"_b, "mls_z_zzzi_h"},
+ {"10x0100"_b, "sqrdmlah_z_zzzi_h"},
+ {"10x0101"_b, "sqrdmlsh_z_zzzi_h"},
+ {"1100000"_b, "sdot_z_zzzi_s"},
+ {"1100001"_b, "udot_z_zzzi_s"},
+ {"1100010"_b, "mla_z_zzzi_s"},
+ {"1100011"_b, "mls_z_zzzi_s"},
+ {"1100100"_b, "sqrdmlah_z_zzzi_s"},
+ {"1100101"_b, "sqrdmlsh_z_zzzi_s"},
+ {"1100110"_b, "usdot_z_zzzi_s"},
+ {"1100111"_b, "sudot_z_zzzi_s"},
+ {"11010x0"_b, "sqdmlalb_z_zzzi_s"},
+ {"11010x1"_b, "sqdmlalt_z_zzzi_s"},
+ {"11011x0"_b, "sqdmlslb_z_zzzi_s"},
+ {"11011x1"_b, "sqdmlslt_z_zzzi_s"},
+ {"1110000"_b, "sdot_z_zzzi_d"},
+ {"1110001"_b, "udot_z_zzzi_d"},
+ {"1110010"_b, "mla_z_zzzi_d"},
+ {"1110011"_b, "mls_z_zzzi_d"},
+ {"1110100"_b, "sqrdmlah_z_zzzi_d"},
+ {"1110101"_b, "sqrdmlsh_z_zzzi_d"},
+ {"11110x0"_b, "sqdmlalb_z_zzzi_d"},
+ {"11110x1"_b, "sqdmlalt_z_zzzi_d"},
+ {"11111x0"_b, "sqdmlslb_z_zzzi_d"},
+ {"11111x1"_b, "sqdmlslt_z_zzzi_d"},
+ },
+ },
+
+ { "_ljxhnq",
+ {12},
+ { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
+ },
+ },
+
+ { "_lkttgy",
{10},
- { {"0", "VisitSVETableLookup"},
+ { {"0"_b, "saba_z_zzz"},
+ {"1"_b, "uaba_z_zzz"},
},
},
- { "UnallocSVEBroadcastGeneralRegister",
- {17, 16, 10},
- { {"000", "VisitSVEBroadcastGeneralRegister"},
+ { "_lkvynm",
+ {22, 20, 19, 13, 12},
+ { {"0x100"_b, "ushr_asisdshf_r"},
+ {"0x101"_b, "usra_asisdshf_r"},
+ {"0x110"_b, "urshr_asisdshf_r"},
+ {"0x111"_b, "ursra_asisdshf_r"},
+ {"10x00"_b, "ushr_asisdshf_r"},
+ {"10x01"_b, "usra_asisdshf_r"},
+ {"10x10"_b, "urshr_asisdshf_r"},
+ {"10x11"_b, "ursra_asisdshf_r"},
+ {"11100"_b, "ushr_asisdshf_r"},
+ {"11101"_b, "usra_asisdshf_r"},
+ {"11110"_b, "urshr_asisdshf_r"},
+ {"11111"_b, "ursra_asisdshf_r"},
+ {"x1000"_b, "ushr_asisdshf_r"},
+ {"x1001"_b, "usra_asisdshf_r"},
+ {"x1010"_b, "urshr_asisdshf_r"},
+ {"x1011"_b, "ursra_asisdshf_r"},
},
},
- { "UnallocSVEInsertGeneralRegister",
- {17, 16, 10},
- { {"000", "VisitSVEInsertGeneralRegister"},
+ { "_lkxgjy",
+ {23, 22},
+ { {"10"_b, "cmla_z_zzzi_h"},
+ {"11"_b, "cmla_z_zzzi_s"},
},
},
- { "UnallocSVEUnpackVectorElements",
+ { "_llnzlv",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "sqneg_asisdmisc_r"},
+ },
+ },
+
+ { "_llpsqq",
+ {13, 12, 10},
+ { {"001"_b, "_zjjxjl"},
+ {"100"_b, "ptrues_p_s"},
+ {"110"_b, "_njngkk"},
+ },
+ },
+
+ { "_llqjlh",
{10},
- { {"0", "VisitSVEUnpackVectorElements"},
+ { {"0"_b, "_lhtyjq"},
},
},
- { "UnallocSVEInsertSIMDFPScalarRegister",
- {17, 16, 10},
- { {"000", "VisitSVEInsertSIMDFPScalarRegister"},
+ { "_llvrrk",
+ {23, 18, 17, 16},
+ { {"0000"_b, "sqxtnb_z_zz"},
},
},
- { "UnallocSVEReverseVectorElements",
- {17, 16, 10},
- { {"000", "VisitSVEReverseVectorElements"},
+ { "_llxlqz",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "cmge_asisdmisc_z"},
},
},
- { "DecodeSVE00011001",
- {20, 19, 18, 12, 11},
- { {"xxx00", "UnallocSVEBroadcastIndexElement"},
- {"xxx10", "UnallocSVETableLookup"},
- {"00011", "UnallocSVEBroadcastGeneralRegister"},
- {"00111", "UnallocSVEInsertGeneralRegister"},
- {"10011", "UnallocSVEUnpackVectorElements"},
- {"10111", "UnallocSVEInsertSIMDFPScalarRegister"},
- {"11011", "UnallocSVEReverseVectorElements"},
+ { "_lmtnzv",
+ {12},
+ { {"0"_b, "st2_asisdlsop_dx2_r2d"},
},
},
- { "UnallocSVEPermutePredicateElements",
+ { "_lmyxhr",
{9, 4},
- { {"00", "VisitSVEPermutePredicateElements"},
+ { {"00"_b, "_gnqhsl"},
+ },
+ },
+
+ { "_lnjpjs",
+ {18, 17},
+ { {"0x"_b, "ld3_asisdlsop_sx3_r3s"},
+ {"10"_b, "ld3_asisdlsop_sx3_r3s"},
+ {"11"_b, "ld3_asisdlsop_s3_i3s"},
+ },
+ },
+
+ { "_lnkqjp",
+ {18, 17, 12},
+ { {"000"_b, "ld3_asisdlso_d3_3d"},
+ },
+ },
+
+ { "_lnnyzt",
+ {23, 22},
+ { {"01"_b, "fmax_asimdsamefp16_only"},
+ {"11"_b, "fmin_asimdsamefp16_only"},
+ },
+ },
+
+ { "_lnpvky",
+ {23, 22, 19, 13, 12},
+ { {"00100"_b, "sha1h_ss_cryptosha2"},
+ {"00101"_b, "sha1su1_vv_cryptosha2"},
+ {"00110"_b, "sha256su0_vv_cryptosha2"},
+ {"xx011"_b, "suqadd_asisdmisc_r"},
+ },
+ },
+
+ { "_lpkqzl",
+ {30, 23, 22, 12, 11, 10},
+ { {"0000xx"_b, "adds_64s_addsub_ext"},
+ {"000100"_b, "adds_64s_addsub_ext"},
+ {"1000xx"_b, "subs_64s_addsub_ext"},
+ {"100100"_b, "subs_64s_addsub_ext"},
+ },
+ },
+
+ { "_lpslrz",
+ {4, 3, 2, 1, 0},
+ { {"00000"_b, "fcmp_s_floatcmp"},
+ {"01000"_b, "fcmp_sz_floatcmp"},
+ {"10000"_b, "fcmpe_s_floatcmp"},
+ {"11000"_b, "fcmpe_sz_floatcmp"},
+ },
+ },
+
+ { "_lpsvyy",
+ {30, 13},
+ { {"00"_b, "_jlrrlt"},
+ {"01"_b, "_jrlynj"},
+ {"10"_b, "fmla_z_p_zzz"},
+ {"11"_b, "fmls_z_p_zzz"},
+ },
+ },
+
+ { "_lpsxhz",
+ {22, 20, 19, 18, 17, 16, 13, 12},
+ { {"01111101"_b, "ld64b_64l_memop"},
+ },
+ },
+
+ { "_lqmksm",
+ {30, 23, 22, 20, 13, 4},
+ { {"00001x"_b, "ld1row_z_p_bi_u32"},
+ {"000x0x"_b, "ld1row_z_p_br_contiguous"},
+ {"01001x"_b, "ld1rod_z_p_bi_u64"},
+ {"010x0x"_b, "ld1rod_z_p_br_contiguous"},
+ {"110x00"_b, "str_p_bi"},
+ },
+ },
+
+ { "_lqnvvj",
+ {22, 13, 12},
+ { {"000"_b, "swp_32_memop"},
+ {"100"_b, "swpl_32_memop"},
+ },
+ },
+
+ { "_lrjyhr",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "bic_asimdimm_l_hl"},
+ {"00x100"_b, "uqshrn_asimdshf_n"},
+ {"00x101"_b, "uqrshrn_asimdshf_n"},
+ {"010x00"_b, "uqshrn_asimdshf_n"},
+ {"010x01"_b, "uqrshrn_asimdshf_n"},
+ {"011100"_b, "uqshrn_asimdshf_n"},
+ {"011101"_b, "uqrshrn_asimdshf_n"},
+ {"0x1000"_b, "uqshrn_asimdshf_n"},
+ {"0x1001"_b, "uqrshrn_asimdshf_n"},
+ },
+ },
+
+ { "_lrntmz",
+ {13, 12, 11, 10},
+ { {"0000"_b, "saddlb_z_zz"},
+ {"0001"_b, "saddlt_z_zz"},
+ {"0010"_b, "uaddlb_z_zz"},
+ {"0011"_b, "uaddlt_z_zz"},
+ {"0100"_b, "ssublb_z_zz"},
+ {"0101"_b, "ssublt_z_zz"},
+ {"0110"_b, "usublb_z_zz"},
+ {"0111"_b, "usublt_z_zz"},
+ {"1100"_b, "sabdlb_z_zz"},
+ {"1101"_b, "sabdlt_z_zz"},
+ {"1110"_b, "uabdlb_z_zz"},
+ {"1111"_b, "uabdlt_z_zz"},
+ },
+ },
+
+ { "_lrqkvp",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0000000"_b, "ldadd_32_memop"},
+ {"0000100"_b, "ldclr_32_memop"},
+ {"0001000"_b, "ldeor_32_memop"},
+ {"0001100"_b, "ldset_32_memop"},
+ {"000xx10"_b, "str_32_ldst_regoff"},
+ {"0010000"_b, "ldaddl_32_memop"},
+ {"0010100"_b, "ldclrl_32_memop"},
+ {"0011000"_b, "ldeorl_32_memop"},
+ {"0011100"_b, "ldsetl_32_memop"},
+ {"001xx10"_b, "ldr_32_ldst_regoff"},
+ {"0100000"_b, "ldadda_32_memop"},
+ {"0100100"_b, "ldclra_32_memop"},
+ {"0101000"_b, "ldeora_32_memop"},
+ {"0101100"_b, "ldseta_32_memop"},
+ {"010xx10"_b, "ldrsw_64_ldst_regoff"},
+ {"0110000"_b, "ldaddal_32_memop"},
+ {"0110100"_b, "ldclral_32_memop"},
+ {"0111000"_b, "ldeoral_32_memop"},
+ {"0111100"_b, "ldsetal_32_memop"},
+ {"1000000"_b, "ldadd_64_memop"},
+ {"1000100"_b, "ldclr_64_memop"},
+ {"1001000"_b, "ldeor_64_memop"},
+ {"1001100"_b, "ldset_64_memop"},
+ {"100xx10"_b, "str_64_ldst_regoff"},
+ {"1010000"_b, "ldaddl_64_memop"},
+ {"1010100"_b, "ldclrl_64_memop"},
+ {"1011000"_b, "ldeorl_64_memop"},
+ {"1011100"_b, "ldsetl_64_memop"},
+ {"101xx10"_b, "ldr_64_ldst_regoff"},
+ {"10xxx01"_b, "ldraa_64_ldst_pac"},
+ {"10xxx11"_b, "ldraa_64w_ldst_pac"},
+ {"1100000"_b, "ldadda_64_memop"},
+ {"1100100"_b, "ldclra_64_memop"},
+ {"1101000"_b, "ldeora_64_memop"},
+ {"1101100"_b, "ldseta_64_memop"},
+ {"110xx10"_b, "prfm_p_ldst_regoff"},
+ {"1110000"_b, "ldaddal_64_memop"},
+ {"1110100"_b, "ldclral_64_memop"},
+ {"1111000"_b, "ldeoral_64_memop"},
+ {"1111100"_b, "ldsetal_64_memop"},
+ {"11xxx01"_b, "ldrab_64_ldst_pac"},
+ {"11xxx11"_b, "ldrab_64w_ldst_pac"},
+ },
+ },
+
+ { "_lspzrv",
+ {30, 23, 13},
+ { {"000"_b, "ld1sb_z_p_bz_s_x32_unscaled"},
+ {"001"_b, "ldff1sb_z_p_bz_s_x32_unscaled"},
+ {"010"_b, "ld1sh_z_p_bz_s_x32_unscaled"},
+ {"011"_b, "ldff1sh_z_p_bz_s_x32_unscaled"},
+ {"100"_b, "ld1sb_z_p_bz_d_x32_unscaled"},
+ {"101"_b, "ldff1sb_z_p_bz_d_x32_unscaled"},
+ {"110"_b, "ld1sh_z_p_bz_d_x32_unscaled"},
+ {"111"_b, "ldff1sh_z_p_bz_d_x32_unscaled"},
+ },
+ },
+
+ { "_ltvrrg",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_htmthz"},
+ },
+ },
+
+ { "_lvshqt",
+ {23, 22},
+ { {"00"_b, "_qtkpxg"},
+ },
+ },
+
+ { "_lxgltj",
+ {30, 23, 22},
+ { {"000"_b, "stlxr_sr32_ldstexcl"},
+ {"001"_b, "ldaxr_lr32_ldstexcl"},
+ {"010"_b, "stlr_sl32_ldstexcl"},
+ {"011"_b, "ldar_lr32_ldstexcl"},
+ {"100"_b, "stlxr_sr64_ldstexcl"},
+ {"101"_b, "ldaxr_lr64_ldstexcl"},
+ {"110"_b, "stlr_sl64_ldstexcl"},
+ {"111"_b, "ldar_lr64_ldstexcl"},
+ },
+ },
+
+ { "_lxhlkx",
+ {12, 11, 10},
+ { {"000"_b, "ftmad_z_zzi"},
+ },
+ },
+
+ { "_lxmyjh",
+ {30, 23, 11, 10},
+ { {"0000"_b, "_lqnvvj"},
+ {"0010"_b, "_tmthqm"},
+ {"0100"_b, "_rxjrmn"},
+ {"0110"_b, "_ypqgyp"},
+ {"1000"_b, "_zpsymj"},
+ {"1001"_b, "ldraa_64_ldst_pac"},
+ {"1010"_b, "_rsyzrs"},
+ {"1011"_b, "ldraa_64w_ldst_pac"},
+ {"1100"_b, "_nrrmtx"},
+ {"1101"_b, "ldrab_64_ldst_pac"},
+ {"1110"_b, "_tgqsyg"},
+ {"1111"_b, "ldrab_64w_ldst_pac"},
+ },
+ },
+
+ { "_lxqynh",
+ {23, 22, 19, 18, 17, 16},
+ { {"0000x1"_b, "dup_asimdins_dr_r"},
+ {"000x10"_b, "dup_asimdins_dr_r"},
+ {"0010xx"_b, "dup_asimdins_dr_r"},
+ {"001110"_b, "dup_asimdins_dr_r"},
+ {"00x10x"_b, "dup_asimdins_dr_r"},
+ {"00x111"_b, "dup_asimdins_dr_r"},
+ {"01xxxx"_b, "fmla_asimdsamefp16_only"},
+ {"11xxxx"_b, "fmls_asimdsamefp16_only"},
+ },
+ },
+
+ { "_lxvnxm",
+ {23, 22, 12},
+ { {"100"_b, "fmlsl2_asimdelem_lh"},
+ {"xx1"_b, "sqrdmlah_asimdelem_r"},
+ },
+ },
+
+ { "_lyghyg",
+ {20, 18, 17},
+ { {"000"_b, "_hxmjhn"},
+ },
+ },
+
+ { "_lylpyx",
+ {10},
+ { {"0"_b, "sabalb_z_zzz"},
+ {"1"_b, "sabalt_z_zzz"},
+ },
+ },
+
+ { "_lynsgm",
+ {13},
+ { {"0"_b, "_ttplgp"},
+ },
+ },
+
+ { "_lytkrx",
+ {12, 11, 10},
+ { {"000"_b, "dup_z_zi"},
+ {"010"_b, "tbl_z_zz_2"},
+ {"011"_b, "tbx_z_zz"},
+ {"100"_b, "tbl_z_zz_1"},
+ {"110"_b, "_ylnsvy"},
+ },
+ },
+
+ { "_lyzxhr",
+ {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
+ { {"0011111001"_b, "_smplhv"},
+ },
+ },
+
+ { "_lzpykk",
+ {30, 23, 22},
+ { {"000"_b, "bfm_32m_bitfield"},
+ },
+ },
+
+ { "_mgmgqh",
+ {17},
+ { {"0"_b, "st1_asisdlsop_hx1_r1h"},
+ {"1"_b, "st1_asisdlsop_h1_i1h"},
+ },
+ },
+
+ { "_mgmkyq",
+ {23},
+ { {"0"_b, "fmaxp_asimdsame_only"},
+ {"1"_b, "fminp_asimdsame_only"},
+ },
+ },
+
+ { "_mgqvvn",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "pacdza_64z_dp_1src"},
+ },
+ },
+
+ { "_mgsvlj",
+ {13, 12},
+ { {"00"_b, "udiv_32_dp_2src"},
+ {"10"_b, "asrv_32_dp_2src"},
+ },
+ },
+
+ { "_mhrjvp",
+ {30, 13},
+ { {"00"_b, "_vxhgzz"},
+ {"01"_b, "_lytkrx"},
+ {"10"_b, "_rlyvpn"},
+ {"11"_b, "_yvptvx"},
+ },
+ },
+
+ { "_mjqvxq",
+ {23, 22, 13, 12, 11, 10},
+ { {"0001x0"_b, "fmul_asimdelem_rh_h"},
+ {"0x0001"_b, "shrn_asimdshf_n"},
+ {"0x0011"_b, "rshrn_asimdshf_n"},
+ {"0x0101"_b, "sqshrn_asimdshf_n"},
+ {"0x0111"_b, "sqrshrn_asimdshf_n"},
+ {"0x1001"_b, "sshll_asimdshf_l"},
+ {"1x01x0"_b, "fmul_asimdelem_r_sd"},
+ {"xx00x0"_b, "mul_asimdelem_r"},
+ {"xx10x0"_b, "smull_asimdelem_l"},
+ {"xx11x0"_b, "sqdmull_asimdelem_l"},
+ },
+ },
+
+ { "_mjxzks",
+ {4},
+ { {"0"_b, "ccmp_64_condcmp_reg"},
+ },
+ },
+
+ { "_mkgsly",
+ {19, 18, 17, 16, 4},
+ { {"00000"_b, "brkas_p_p_p_z"},
+ {"10000"_b, "brkns_p_p_pp"},
+ },
+ },
+
+ { "_mkklrm",
+ {18, 17},
+ { {"00"_b, "ld3_asisdlso_s3_3s"},
+ },
+ },
+
+ { "_mkskxj",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ld1sh_z_p_br_s32"},
+ {"0001"_b, "ldff1sh_z_p_br_s32"},
+ {"0010"_b, "ld1w_z_p_br_u64"},
+ {"0011"_b, "ldff1w_z_p_br_u64"},
+ {"0100"_b, "ld1sb_z_p_br_s32"},
+ {"0101"_b, "ldff1sb_z_p_br_s32"},
+ {"0110"_b, "ld1d_z_p_br_u64"},
+ {"0111"_b, "ldff1d_z_p_br_u64"},
+ {"1001"_b, "st2w_z_p_br_contiguous"},
+ {"1011"_b, "st4w_z_p_br_contiguous"},
+ {"10x0"_b, "st1w_z_p_br"},
+ {"1100"_b, "str_z_bi"},
+ {"1101"_b, "st2d_z_p_br_contiguous"},
+ {"1110"_b, "st1d_z_p_br"},
+ {"1111"_b, "st4d_z_p_br_contiguous"},
+ },
+ },
+
+ { "_mlnqrm",
+ {30},
+ { {"0"_b, "_nhzrqr"},
+ {"1"_b, "_zpmkvt"},
+ },
+ },
+
+ { "_mlvpxh",
+ {12},
+ { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+ },
+ },
+
+ { "_mlxtxs",
+ {10},
+ { {"0"_b, "ssra_z_zi"},
+ {"1"_b, "usra_z_zi"},
+ },
+ },
+
+ { "_mlyynz",
+ {12},
+ { {"0"_b, "st3_asisdlsop_dx3_r3d"},
+ },
+ },
+
+ { "_mmhkmp",
+ {18, 17},
+ { {"0x"_b, "ld1_asisdlsop_sx1_r1s"},
+ {"10"_b, "ld1_asisdlsop_sx1_r1s"},
+ {"11"_b, "ld1_asisdlsop_s1_i1s"},
+ },
+ },
+
+ { "_mmknzp",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "clz_asimdmisc_r"},
+ {"00001"_b, "uqxtn_asimdmisc_n"},
+ },
+ },
+
+ { "_mmmjkx",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "rev_32_dp_1src"},
+ },
+ },
+
+ { "_mmrtvz",
+ {12},
+ { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+ },
+ },
+
+ { "_mnmtql",
+ {10},
+ { {"0"_b, "srsra_z_zi"},
+ {"1"_b, "ursra_z_zi"},
},
},
- { "UnallocSVEUnpackPredicateElements",
- {23, 22, 19, 17, 12, 11, 10, 9, 4},
- { {"000000000", "VisitSVEUnpackPredicateElements"},
+ { "_mnxmst",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtns_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtns_asimdmisc_r"},
+ {"1111001"_b, "fcvtps_asimdmiscfp16_r"},
+ {"1x00001"_b, "fcvtps_asimdmisc_r"},
+ {"xx00000"_b, "cmlt_asimdmisc_z"},
+ {"xx10000"_b, "smaxv_asimdall_only"},
+ {"xx10001"_b, "sminv_asimdall_only"},
},
},
- { "UnallocSVEReversePredicateElements",
- {19, 17, 16, 12, 11, 10, 9, 4},
- { {"00000000", "VisitSVEReversePredicateElements"},
+ { "_mpgrgp",
+ {30, 22, 13, 12, 11, 10},
+ { {"000001"_b, "rmif_only_rmif"},
+ {"01xx00"_b, "ccmn_64_condcmp_reg"},
+ {"01xx10"_b, "ccmn_64_condcmp_imm"},
+ {"11xx00"_b, "ccmp_64_condcmp_reg"},
+ {"11xx10"_b, "ccmp_64_condcmp_imm"},
+ },
+ },
+
+ { "_mplgqv",
+ {11, 10},
+ { {"00"_b, "sm3tt1a_vvv4_crypto3_imm2"},
+ {"01"_b, "sm3tt1b_vvv4_crypto3_imm2"},
+ {"10"_b, "sm3tt2a_vvv4_crypto3_imm2"},
+ {"11"_b, "sm3tt2b_vvv_crypto3_imm2"},
+ },
+ },
+
+ { "_mplskr",
+ {13, 12},
+ { {"00"_b, "add_asisdsame_only"},
+ {"11"_b, "sqdmulh_asisdsame_only"},
+ },
+ },
+
+ { "_mpstrr",
+ {23, 22, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+ { {"00000000000"_b, "setffr_f"},
+ },
+ },
+
+ { "_mpvsng",
+ {30},
+ { {"0"_b, "_vvtnrv"},
+ {"1"_b, "_yykhjv"},
+ },
+ },
+
+ { "_mpyhkm",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "fnmsub_s_floatdp3"},
+ {"001xxxx"_b, "fnmsub_d_floatdp3"},
+ {"011xxxx"_b, "fnmsub_h_floatdp3"},
+ {"10x1001"_b, "scvtf_asisdshf_c"},
+ {"10x1111"_b, "fcvtzs_asisdshf_c"},
+ {"1xx00x0"_b, "sqdmulh_asisdelem_r"},
+ {"1xx01x0"_b, "sqrdmulh_asisdelem_r"},
+ },
+ },
+
+ { "_mpyklp",
+ {23, 22, 20, 19, 16, 13, 10},
+ { {"0000000"_b, "_jqjnrv"},
+ {"0000001"_b, "_yqmqzp"},
+ {"0000010"_b, "_hgxqpp"},
+ {"0000011"_b, "_rvzhhx"},
+ {"0100000"_b, "_nnllqy"},
+ {"0100001"_b, "_vhmsgj"},
+ {"0100010"_b, "_mkklrm"},
+ {"0100011"_b, "_lnkqjp"},
+ {"100xx00"_b, "st1_asisdlsop_sx1_r1s"},
+ {"100xx01"_b, "_yxmkzr"},
+ {"100xx10"_b, "st3_asisdlsop_sx3_r3s"},
+ {"100xx11"_b, "_mlyynz"},
+ {"1010x00"_b, "st1_asisdlsop_sx1_r1s"},
+ {"1010x01"_b, "_jnjlsh"},
+ {"1010x10"_b, "st3_asisdlsop_sx3_r3s"},
+ {"1010x11"_b, "_svrnxq"},
+ {"1011000"_b, "st1_asisdlsop_sx1_r1s"},
+ {"1011001"_b, "_hjqtrt"},
+ {"1011010"_b, "st3_asisdlsop_sx3_r3s"},
+ {"1011011"_b, "_vqlytp"},
+ {"1011100"_b, "_qqpqnm"},
+ {"1011101"_b, "_thvvzp"},
+ {"1011110"_b, "_srglgl"},
+ {"1011111"_b, "_qzrjss"},
+ {"110xx00"_b, "ld1_asisdlsop_sx1_r1s"},
+ {"110xx01"_b, "_ljxhnq"},
+ {"110xx10"_b, "ld3_asisdlsop_sx3_r3s"},
+ {"110xx11"_b, "_nkrqgn"},
+ {"1110x00"_b, "ld1_asisdlsop_sx1_r1s"},
+ {"1110x01"_b, "_vmplgv"},
+ {"1110x10"_b, "ld3_asisdlsop_sx3_r3s"},
+ {"1110x11"_b, "_gsttpm"},
+ {"1111000"_b, "ld1_asisdlsop_sx1_r1s"},
+ {"1111001"_b, "_xmqvpl"},
+ {"1111010"_b, "ld3_asisdlsop_sx3_r3s"},
+ {"1111011"_b, "_stqmps"},
+ {"1111100"_b, "_mmhkmp"},
+ {"1111101"_b, "_srvnql"},
+ {"1111110"_b, "_lnjpjs"},
+ {"1111111"_b, "_kstltt"},
+ },
+ },
+
+ { "_mpzqxm",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_vpkhvh"},
+ {"0000001"_b, "_gttglx"},
+ {"0100000"_b, "_gsgzpg"},
+ {"0100001"_b, "_ynqsgl"},
+ {"100xxx0"_b, "st2_asisdlsop_hx2_r2h"},
+ {"100xxx1"_b, "st4_asisdlsop_hx4_r4h"},
+ {"1010xx0"_b, "st2_asisdlsop_hx2_r2h"},
+ {"1010xx1"_b, "st4_asisdlsop_hx4_r4h"},
+ {"10110x0"_b, "st2_asisdlsop_hx2_r2h"},
+ {"10110x1"_b, "st4_asisdlsop_hx4_r4h"},
+ {"1011100"_b, "st2_asisdlsop_hx2_r2h"},
+ {"1011101"_b, "st4_asisdlsop_hx4_r4h"},
+ {"1011110"_b, "_sjsltg"},
+ {"1011111"_b, "_xrpmzt"},
+ {"110xxx0"_b, "ld2_asisdlsop_hx2_r2h"},
+ {"110xxx1"_b, "ld4_asisdlsop_hx4_r4h"},
+ {"1110xx0"_b, "ld2_asisdlsop_hx2_r2h"},
+ {"1110xx1"_b, "ld4_asisdlsop_hx4_r4h"},
+ {"11110x0"_b, "ld2_asisdlsop_hx2_r2h"},
+ {"11110x1"_b, "ld4_asisdlsop_hx4_r4h"},
+ {"1111100"_b, "ld2_asisdlsop_hx2_r2h"},
+ {"1111101"_b, "ld4_asisdlsop_hx4_r4h"},
+ {"1111110"_b, "_gygnsz"},
+ {"1111111"_b, "_kxkyqr"},
+ },
+ },
+
+ { "_mqgtsq",
+ {30, 23, 22, 19},
+ { {"1001"_b, "aesd_b_cryptoaes"},
+ {"xxx0"_b, "cnt_asimdmisc_r"},
+ },
+ },
+
+ { "_mqkjxj",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_lyzxhr"},
+ },
+ },
+
+ { "_mqrzzk",
+ {22, 20, 11},
+ { {"000"_b, "sqincw_z_zs"},
+ {"001"_b, "sqdecw_z_zs"},
+ {"010"_b, "incw_z_zs"},
+ {"100"_b, "sqincd_z_zs"},
+ {"101"_b, "sqdecd_z_zs"},
+ {"110"_b, "incd_z_zs"},
+ },
+ },
+
+ { "_mrhtxt",
+ {23, 22, 20, 9},
+ { {"0000"_b, "brkpb_p_p_pp"},
+ {"0100"_b, "brkpbs_p_p_pp"},
+ },
+ },
+
+ { "_mrkkps",
+ {17},
+ { {"0"_b, "ld1_asisdlsop_hx1_r1h"},
+ {"1"_b, "ld1_asisdlsop_h1_i1h"},
+ },
+ },
+
+ { "_mrmpgh",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "stlxp_sp32_ldstexcl"},
+ {"001xxxx"_b, "ldaxp_lp32_ldstexcl"},
+ {"0101111"_b, "casl_c32_ldstexcl"},
+ {"0111111"_b, "casal_c32_ldstexcl"},
+ {"100xxxx"_b, "stlxp_sp64_ldstexcl"},
+ {"101xxxx"_b, "ldaxp_lp64_ldstexcl"},
+ {"1101111"_b, "casl_c64_ldstexcl"},
+ {"1111111"_b, "casal_c64_ldstexcl"},
},
},
- { "DecodeSVE00011010",
- {20, 18},
- { {"0x", "UnallocSVEPermutePredicateElements"},
- {"10", "UnallocSVEUnpackPredicateElements"},
- {"11", "UnallocSVEReversePredicateElements"},
+ { "_mrqqlp",
+ {30, 11, 10},
+ { {"000"_b, "_gqykqv"},
+ {"001"_b, "_xgvgmk"},
+ {"010"_b, "_tjpjng"},
+ {"011"_b, "_pjkylt"},
+ {"101"_b, "_yrgnqz"},
+ {"110"_b, "_hhymvj"},
+ {"111"_b, "_xpmvjv"},
},
},
- { "DecodeSVE00011100",
+ { "_msgqps",
+ {18, 17},
+ { {"0x"_b, "ld2_asisdlsop_sx2_r2s"},
+ {"10"_b, "ld2_asisdlsop_sx2_r2s"},
+ {"11"_b, "ld2_asisdlsop_s2_i2s"},
+ },
+ },
+
+ { "_msnsjp",
{23, 20, 19, 18, 17, 16},
- { {"x00000", "VisitSVECopySIMDFPScalarRegisterToVector_Predicated"},
- {"x0001x", "VisitSVEExtractElementToSIMDFPScalarRegister"},
- {"x001xx", "VisitSVEReverseWithinElements"},
- {"x0100x", "VisitSVEConditionallyBroadcastElementToVector"},
- {"x0101x", "VisitSVEConditionallyExtractElementToSIMDFPScalar"},
- {"x01100", "VisitSVEVectorSplice_Destructive"},
- {"100001", "VisitSVECompressActiveElements"},
+ { {"000001"_b, "fcvtxn_asisdmisc_n"},
+ },
+ },
+
+ { "_msqkyy",
+ {16, 13, 12},
+ { {"000"_b, "rbit_64_dp_1src"},
+ {"001"_b, "clz_64_dp_1src"},
+ {"100"_b, "pacia_64p_dp_1src"},
+ {"101"_b, "autia_64p_dp_1src"},
+ {"110"_b, "_sqgxzn"},
+ {"111"_b, "_kqkhtz"},
+ },
+ },
+
+ { "_mstthg",
+ {13, 12, 11, 10},
+ { {"0000"_b, "umull_asimddiff_l"},
+ {"0001"_b, "_qptvrm"},
+ {"0010"_b, "_qqzrhz"},
+ {"0011"_b, "_yxhrpk"},
+ {"0101"_b, "_vsqpzr"},
+ {"0110"_b, "_kjrxpx"},
+ {"0111"_b, "_qnvgmh"},
+ {"1001"_b, "_jvhnxl"},
+ {"1010"_b, "_zyzzhm"},
+ {"1011"_b, "_slhpgp"},
+ {"1101"_b, "_mgmkyq"},
+ {"1110"_b, "_qvlytr"},
+ {"1111"_b, "_qtmjkr"},
+ },
+ },
+
+ { "_msztzv",
+ {23, 11, 10, 4, 3, 2, 1},
+ { {"0000000"_b, "_vvprhx"},
+ {"0101111"_b, "_nqysxy"},
+ {"0111111"_b, "_kkmjyr"},
+ {"1000000"_b, "_ypjyqh"},
+ },
+ },
+
+ { "_mtgksl",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_vnrnmg"},
+ {"0000001"_b, "_hzllgl"},
+ {"0100000"_b, "_hrhzqy"},
+ {"0100001"_b, "_qtjzhs"},
+ {"100xxx0"_b, "st4_asisdlsep_r4_r"},
+ {"100xxx1"_b, "st1_asisdlsep_r4_r4"},
+ {"1010xx0"_b, "st4_asisdlsep_r4_r"},
+ {"1010xx1"_b, "st1_asisdlsep_r4_r4"},
+ {"10110x0"_b, "st4_asisdlsep_r4_r"},
+ {"10110x1"_b, "st1_asisdlsep_r4_r4"},
+ {"1011100"_b, "st4_asisdlsep_r4_r"},
+ {"1011101"_b, "st1_asisdlsep_r4_r4"},
+ {"1011110"_b, "_nzkhrj"},
+ {"1011111"_b, "_gmjhll"},
+ {"110xxx0"_b, "ld4_asisdlsep_r4_r"},
+ {"110xxx1"_b, "ld1_asisdlsep_r4_r4"},
+ {"1110xx0"_b, "ld4_asisdlsep_r4_r"},
+ {"1110xx1"_b, "ld1_asisdlsep_r4_r4"},
+ {"11110x0"_b, "ld4_asisdlsep_r4_r"},
+ {"11110x1"_b, "ld1_asisdlsep_r4_r4"},
+ {"1111100"_b, "ld4_asisdlsep_r4_r"},
+ {"1111101"_b, "ld1_asisdlsep_r4_r4"},
+ {"1111110"_b, "_hxglyp"},
+ {"1111111"_b, "_jmyslr"},
+ },
+ },
+
+ { "_mthzvm",
+ {30, 23, 13, 12, 11, 10},
+ { {"100001"_b, "ushr_asisdshf_r"},
+ {"100101"_b, "usra_asisdshf_r"},
+ {"101001"_b, "urshr_asisdshf_r"},
+ {"101101"_b, "ursra_asisdshf_r"},
+ },
+ },
+
+ { "_mtjrtt",
+ {13, 12},
+ { {"00"_b, "subps_64s_dp_2src"},
},
},
- { "DecodeSVE00011101",
+ { "_mtlhnl",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "movi_asimdimm_l_sl"},
+ {"00x100"_b, "sshr_asimdshf_r"},
+ {"00x110"_b, "srshr_asimdshf_r"},
+ {"010x00"_b, "sshr_asimdshf_r"},
+ {"010x10"_b, "srshr_asimdshf_r"},
+ {"011100"_b, "sshr_asimdshf_r"},
+ {"011110"_b, "srshr_asimdshf_r"},
+ {"0x1000"_b, "sshr_asimdshf_r"},
+ {"0x1010"_b, "srshr_asimdshf_r"},
+ },
+ },
+
+ { "_mtnpmr",
+ {13, 12, 11, 10},
+ { {"0000"_b, "smull_asimddiff_l"},
+ {"0001"_b, "_ypznsm"},
+ {"0010"_b, "_sgztlj"},
+ {"0011"_b, "_nsnyxt"},
+ {"0100"_b, "sqdmull_asimddiff_l"},
+ {"0101"_b, "_plltlx"},
+ {"0110"_b, "_qtystr"},
+ {"0111"_b, "_gymljg"},
+ {"1000"_b, "pmull_asimddiff_l"},
+ {"1001"_b, "_rpmrkq"},
+ {"1010"_b, "_hvvyhl"},
+ {"1011"_b, "_hlshjk"},
+ {"1101"_b, "_gmvjgn"},
+ {"1110"_b, "_rsyjqj"},
+ {"1111"_b, "_yvlhjg"},
+ },
+ },
+
+ { "_mtzgpn",
+ {30},
+ { {"0"_b, "cbz_32_compbranch"},
+ },
+ },
+
+ { "_mvglql",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "mvni_asimdimm_l_hl"},
+ {"00x100"_b, "sqshrun_asimdshf_n"},
+ {"00x101"_b, "sqrshrun_asimdshf_n"},
+ {"00x110"_b, "ushll_asimdshf_l"},
+ {"010x00"_b, "sqshrun_asimdshf_n"},
+ {"010x01"_b, "sqrshrun_asimdshf_n"},
+ {"010x10"_b, "ushll_asimdshf_l"},
+ {"011100"_b, "sqshrun_asimdshf_n"},
+ {"011101"_b, "sqrshrun_asimdshf_n"},
+ {"011110"_b, "ushll_asimdshf_l"},
+ {"0x1000"_b, "sqshrun_asimdshf_n"},
+ {"0x1001"_b, "sqrshrun_asimdshf_n"},
+ {"0x1010"_b, "ushll_asimdshf_l"},
+ },
+ },
+
+ { "_mvgsjr",
{20, 19, 18, 17, 16},
- { {"0000x", "VisitSVEExtractElementToGeneralRegister"},
- {"01000", "VisitSVECopyGeneralRegisterToVector_Predicated"},
- {"1000x", "VisitSVEConditionallyExtractElementToGeneralRegister"},
+ { {"00000"_b, "usqadd_asimdmisc_r"},
+ {"00001"_b, "shll_asimdmisc_s"},
+ {"10000"_b, "uaddlv_asimdall_only"},
},
},
- { "UnallocSVEPartitionBreakCondition",
- {18, 17, 16, 9},
- { {"0000", "VisitSVEPartitionBreakCondition"},
+ { "_mvzvpk",
+ {30},
+ { {"0"_b, "orn_64_log_shift"},
+ {"1"_b, "bics_64_log_shift"},
},
},
- { "UnallocSVEPropagateBreakToNextPartition",
- {23, 18, 17, 16, 9, 4},
- { {"000000", "VisitSVEPropagateBreakToNextPartition"},
+ { "_mxgykv",
+ {19, 18, 17, 16},
+ { {"0000"_b, "cntp_r_p_p"},
+ {"1000"_b, "_lynsgm"},
+ {"1001"_b, "_jxyskn"},
+ {"1010"_b, "_jmxstz"},
+ {"1011"_b, "_yjzknm"},
+ {"1100"_b, "_zmtkvx"},
+ {"1101"_b, "_yhmlxk"},
},
},
- { "DecodeSVE0011001x",
- {20, 19},
- { {"0x", "VisitSVEPredicateLogical"},
- {"10", "UnallocSVEPartitionBreakCondition"},
- {"11", "UnallocSVEPropagateBreakToNextPartition"},
+ { "_mxkgnq",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "scvtf_asisdshf_c"},
+ {"001x0"_b, "scvtf_asisdshf_c"},
+ {"01xx0"_b, "scvtf_asisdshf_c"},
},
},
- { "UnallocSVEPredicateTest",
- {18, 17, 9, 4},
- { {"0000", "VisitSVEPredicateTest"},
+ { "_mxnzyr",
+ {19, 16},
+ { {"00"_b, "_nhxxmh"},
+ {"10"_b, "_qgymsy"},
+ {"11"_b, "_gjprmg"},
},
},
- { "UnallocSVEPredicateFirstActive",
- {18, 17, 12, 11, 10, 9, 4},
- { {"0000000", "VisitSVEPredicateFirstActive"},
+ { "_mxtskk",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fmov_h_floatdp1"},
+ {"000010"_b, "fneg_h_floatdp1"},
+ {"000100"_b, "fcvt_sh_floatdp1"},
+ {"001000"_b, "frintn_h_floatdp1"},
+ {"001010"_b, "frintm_h_floatdp1"},
+ {"001100"_b, "frinta_h_floatdp1"},
+ {"001110"_b, "frintx_h_floatdp1"},
},
},
- { "UnallocSVEPredicateNextActive",
- {18, 17, 12, 11, 10, 9, 4},
- { {"0000100", "VisitSVEPredicateNextActive"},
+ { "_mxvjxx",
+ {20, 19, 18, 16},
+ { {"0000"_b, "_nshjhk"},
},
},
- { "DecodeSVE00110110",
- {20, 19, 16},
- { {"0xx", "VisitSVEPropagateBreak"},
- {"100", "UnallocSVEPredicateTest"},
- {"110", "UnallocSVEPredicateFirstActive"},
- {"111", "UnallocSVEPredicateNextActive"},
+ { "_myjqrl",
+ {22, 20, 19, 18, 17, 16},
+ { {"111000"_b, "fcmge_asisdmiscfp16_fz"},
+ {"x00000"_b, "fcmge_asisdmisc_fz"},
+ {"x10000"_b, "fminnmp_asisdpair_only_sd"},
},
},
- { "UnallocSVEPredicateTest",
- {18, 17, 9, 4},
- { {"0000", "VisitSVEPredicateTest"},
+ { "_mykjss",
+ {17},
+ { {"0"_b, "st2_asisdlsop_bx2_r2b"},
+ {"1"_b, "st2_asisdlsop_b2_i2b"},
},
},
- { "UnallocSVEPredicateInitialize",
- {18, 17, 11, 4},
- { {"0000", "VisitSVEPredicateInitialize"},
+ { "_mylphg",
+ {30, 13, 4},
+ { {"000"_b, "cmpge_p_p_zw"},
+ {"001"_b, "cmpgt_p_p_zw"},
+ {"010"_b, "cmplt_p_p_zw"},
+ {"011"_b, "cmple_p_p_zw"},
+ {"1xx"_b, "fcmla_z_p_zzz"},
},
},
- { "UnallocSVEPredicateZero",
- {18, 17, 11, 9, 8, 7, 6, 5, 4},
- { {"000000000", "VisitSVEPredicateZero"},
+ { "_myrshl",
+ {4},
+ { {"0"_b, "ccmn_32_condcmp_imm"},
},
},
- { "UnallocSVEPredicateReadFromFFR_Predicated",
- {18, 17, 11, 9, 4},
- { {"00000", "VisitSVEPredicateReadFromFFR_Predicated"},
+ { "_myxhpq",
+ {12},
+ { {"0"_b, "udot_asimdelem_d"},
+ {"1"_b, "sqrdmlsh_asimdelem_r"},
},
},
- { "UnallocSVEPredicateReadFromFFR_Unpredicated",
- {18, 17, 11, 9, 8, 7, 6, 5, 4},
- { {"000000000", "VisitSVEPredicateReadFromFFR_Unpredicated"},
+ { "_mzhsrq",
+ {4},
+ { {"0"_b, "cmplt_p_p_zi"},
+ {"1"_b, "cmple_p_p_zi"},
},
},
- { "DecodeSVE00110111",
- {20, 19, 16, 12, 10},
- { {"0xxxx", "VisitSVEPropagateBreak"},
- {"100xx", "UnallocSVEPredicateTest"},
- {"11x00", "UnallocSVEPredicateInitialize"},
- {"11001", "UnallocSVEPredicateZero"},
- {"11010", "UnallocSVEPredicateReadFromFFR_Predicated"},
- {"11110", "UnallocSVEPredicateReadFromFFR_Unpredicated"},
+ { "_mzqzhq",
+ {23, 22, 20, 19, 11},
+ { {"00000"_b, "mvni_asimdimm_m_sm"},
},
},
- { "UnallocSVEConditionallyTerminateScalars",
- {12, 11, 10, 3, 2, 1, 0},
- { {"0000000", "VisitSVEConditionallyTerminateScalars"},
+ { "_mzynlp",
+ {23, 22, 13},
+ { {"100"_b, "fmlal2_asimdelem_lh"},
+ {"xx1"_b, "umull_asimdelem_l"},
},
},
- { "UnallocSVEPredicateCount_2",
- {20},
- { {"0", "VisitSVEPredicateCount"},
+ { "_ngttyj",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ld1b_z_p_br_u16"},
+ {"0001"_b, "ldff1b_z_p_br_u16"},
+ {"0010"_b, "ld1b_z_p_br_u64"},
+ {"0011"_b, "ldff1b_z_p_br_u64"},
+ {"0100"_b, "ld1h_z_p_br_u16"},
+ {"0101"_b, "ldff1h_z_p_br_u16"},
+ {"0110"_b, "ld1h_z_p_br_u64"},
+ {"0111"_b, "ldff1h_z_p_br_u64"},
+ {"1001"_b, "st2b_z_p_br_contiguous"},
+ {"1011"_b, "st4b_z_p_br_contiguous"},
+ {"10x0"_b, "st1b_z_p_br"},
+ {"1101"_b, "st2h_z_p_br_contiguous"},
+ {"1111"_b, "st4h_z_p_br_contiguous"},
+ {"11x0"_b, "st1h_z_p_br"},
},
},
- { "UnallocSVEIncDecByPredicateCount",
- {20},
- { {"0", "VisitSVEIncDecByPredicateCount"},
+ { "_ngxkmp",
+ {18, 17},
+ { {"0x"_b, "st3_asisdlsep_r3_r"},
+ {"10"_b, "st3_asisdlsep_r3_r"},
+ {"11"_b, "st3_asisdlsep_i3_i"},
},
},
- { "UnallocSVEFFRWriteFromPredicate",
- {20, 17, 16, 11, 10, 9, 4, 3, 2, 1, 0},
- { {"00000000000", "VisitSVEFFRWriteFromPredicate"},
+ { "_ngzyqj",
+ {11, 10},
+ { {"00"_b, "asr_z_zi"},
+ {"01"_b, "lsr_z_zi"},
+ {"11"_b, "lsl_z_zi"},
},
},
- { "UnallocSVEFFRInitialise",
- {20, 17, 16, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
- { {"000000000000000", "VisitSVEFFRInitialise"},
+ { "_nhhpqz",
+ {23, 22, 13, 12},
+ { {"0000"_b, "fmul_s_floatdp2"},
+ {"0001"_b, "fdiv_s_floatdp2"},
+ {"0010"_b, "fadd_s_floatdp2"},
+ {"0011"_b, "fsub_s_floatdp2"},
+ {"0100"_b, "fmul_d_floatdp2"},
+ {"0101"_b, "fdiv_d_floatdp2"},
+ {"0110"_b, "fadd_d_floatdp2"},
+ {"0111"_b, "fsub_d_floatdp2"},
+ {"1100"_b, "fmul_h_floatdp2"},
+ {"1101"_b, "fdiv_h_floatdp2"},
+ {"1110"_b, "fadd_h_floatdp2"},
+ {"1111"_b, "fsub_h_floatdp2"},
},
},
- { "DecodeSVE00111100",
- {19, 18, 12},
- { {"0xx", "UnallocSVEPredicateCount_2"},
- {"1x0", "UnallocSVEIncDecByPredicateCount"},
- {"101", "UnallocSVEFFRWriteFromPredicate"},
- {"111", "UnallocSVEFFRInitialise"},
+ { "_nhkstj",
+ {30, 23, 22},
+ { {"00x"_b, "add_64_addsub_shift"},
+ {"010"_b, "add_64_addsub_shift"},
+ {"10x"_b, "sub_64_addsub_shift"},
+ {"110"_b, "sub_64_addsub_shift"},
},
},
- { "UnallocSVEPredicateCount",
- {20, 19},
- { {"00", "VisitSVEPredicateCount"},
+ { "_nhxxmh",
+ {23, 22, 9, 3, 2, 1, 0},
+ { {"0100000"_b, "ptest_p_p"},
+ },
+ },
+
+ { "_nhzrqr",
+ {23, 22},
+ { {"00"_b, "fmadd_s_floatdp3"},
+ {"01"_b, "fmadd_d_floatdp3"},
+ {"11"_b, "fmadd_h_floatdp3"},
+ },
+ },
+
+ { "_nhzyvv",
+ {23, 22, 4, 3, 2, 1, 0},
+ { {"0000000"_b, "brk_ex_exception"},
+ {"0100000"_b, "tcancel_ex_exception"},
+ {"1000001"_b, "dcps1_dc_exception"},
+ {"1000010"_b, "dcps2_dc_exception"},
+ {"1000011"_b, "dcps3_dc_exception"},
+ },
+ },
+
+ { "_njgmvx",
+ {18, 17},
+ { {"00"_b, "_rzqzlq"},
},
},
- { "DecodeSVE0011111x",
- {20, 19, 16},
- { {"00x", "VisitSVEIntAddSubtractImm_Unpredicated"},
- {"01x", "VisitSVEIntMinMaxImm_Unpredicated"},
- {"10x", "VisitSVEIntMulImm_Unpredicated"},
- {"110", "VisitSVEBroadcastIntImm_Unpredicated"},
- {"111", "VisitSVEBroadcastFPImm_Unpredicated"},
+ { "_njgxlz",
+ {30},
+ { {"0"_b, "_txzxzs"},
+ {"1"_b, "_vprkpq"},
},
},
- { "UnallocSVEFPComplexAddition",
+ { "_njngkk",
+ {23, 22, 9, 8, 7, 6, 5},
+ { {"0000000"_b, "rdffr_p_f"},
+ },
+ },
+
+ { "_njtngm",
+ {13, 12, 10},
+ { {"001"_b, "_qkzlkj"},
+ {"010"_b, "_jvpqrp"},
+ {"011"_b, "_kknjng"},
+ {"101"_b, "_xmtlmj"},
+ {"110"_b, "sqdmlal_asisdelem_l"},
+ {"111"_b, "_zgjpym"},
+ },
+ },
+
+ { "_njvkjq",
+ {11, 10},
+ { {"00"_b, "index_z_ii"},
+ {"01"_b, "index_z_ri"},
+ {"10"_b, "index_z_ir"},
+ {"11"_b, "index_z_rr"},
+ },
+ },
+
+ { "_njxtpv",
+ {30, 23, 22, 11, 10, 4},
+ { {"001000"_b, "ccmn_32_condcmp_reg"},
+ {"001100"_b, "ccmn_32_condcmp_imm"},
+ {"101000"_b, "ccmp_32_condcmp_reg"},
+ {"101100"_b, "ccmp_32_condcmp_imm"},
+ },
+ },
+
+ { "_nkjgpq",
+ {23, 20, 19, 18, 17, 16, 13},
+ { {"0000000"_b, "ld1r_asisdlso_r1"},
+ {"0000001"_b, "ld3r_asisdlso_r3"},
+ {"10xxxx0"_b, "ld1r_asisdlsop_rx1_r"},
+ {"10xxxx1"_b, "ld3r_asisdlsop_rx3_r"},
+ {"110xxx0"_b, "ld1r_asisdlsop_rx1_r"},
+ {"110xxx1"_b, "ld3r_asisdlsop_rx3_r"},
+ {"1110xx0"_b, "ld1r_asisdlsop_rx1_r"},
+ {"1110xx1"_b, "ld3r_asisdlsop_rx3_r"},
+ {"11110x0"_b, "ld1r_asisdlsop_rx1_r"},
+ {"11110x1"_b, "ld3r_asisdlsop_rx3_r"},
+ {"1111100"_b, "ld1r_asisdlsop_rx1_r"},
+ {"1111101"_b, "ld3r_asisdlsop_rx3_r"},
+ {"1111110"_b, "ld1r_asisdlsop_r1_i"},
+ {"1111111"_b, "ld3r_asisdlsop_r3_i"},
+ },
+ },
+
+ { "_nkrqgn",
+ {12},
+ { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
+ },
+ },
+
+ { "_nkxhsy",
+ {22, 20, 11},
+ { {"000"_b, "cntb_r_s"},
+ {"010"_b, "incb_r_rs"},
+ {"100"_b, "cnth_r_s"},
+ {"110"_b, "inch_r_rs"},
+ },
+ },
+
+ { "_nlgqsk",
+ {30, 23, 13, 12, 11, 10},
+ { {"100001"_b, "sri_asisdshf_r"},
+ {"100101"_b, "sli_asisdshf_r"},
+ {"101001"_b, "sqshlu_asisdshf_r"},
+ {"101101"_b, "uqshl_asisdshf_r"},
+ },
+ },
+
+ { "_nlkkyx",
+ {23, 13, 12},
+ { {"001"_b, "fmulx_asisdsame_only"},
+ {"011"_b, "frecps_asisdsame_only"},
+ {"111"_b, "frsqrts_asisdsame_only"},
+ },
+ },
+
+ { "_nllnsg",
+ {30, 23, 22, 19, 16},
+ { {"10010"_b, "aesmc_b_cryptoaes"},
+ {"x0x01"_b, "fcvtn_asimdmisc_n"},
+ {"x1001"_b, "bfcvtn_asimdmisc_4s"},
+ {"xxx00"_b, "sadalp_asimdmisc_p"},
+ },
+ },
+
+ { "_nlpmvl",
+ {30, 13},
+ { {"00"_b, "mad_z_p_zzz"},
+ {"01"_b, "msb_z_p_zzz"},
+ },
+ },
+
+ { "_nlqglq",
+ {13, 10},
+ { {"00"_b, "_lxvnxm"},
+ {"01"_b, "_mzqzhq"},
+ {"10"_b, "_myxhpq"},
+ {"11"_b, "_pslllp"},
+ },
+ },
+
+ { "_nlyntn",
+ {23, 22, 20, 19, 11},
+ { {"00000"_b, "movi_asimdimm_l_sl"},
+ },
+ },
+
+ { "_nmkqzt",
{20, 19, 18, 17},
- { {"0000", "VisitSVEFPComplexAddition"},
+ { {"0000"_b, "_nvqlyn"},
+ },
+ },
+
+ { "_nmtkjv",
+ {17},
+ { {"0"_b, "ld1_asisdlso_h1_1h"},
+ },
+ },
+
+ { "_nmzyvt",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0000000"_b, "ldsmaxb_32_memop"},
+ {"0000100"_b, "ldsminb_32_memop"},
+ {"0000x10"_b, "strb_32b_ldst_regoff"},
+ {"0001000"_b, "ldumaxb_32_memop"},
+ {"0001100"_b, "lduminb_32_memop"},
+ {"0001x10"_b, "strb_32bl_ldst_regoff"},
+ {"0010000"_b, "ldsmaxlb_32_memop"},
+ {"0010100"_b, "ldsminlb_32_memop"},
+ {"0010x10"_b, "ldrb_32b_ldst_regoff"},
+ {"0011000"_b, "ldumaxlb_32_memop"},
+ {"0011100"_b, "lduminlb_32_memop"},
+ {"0011x10"_b, "ldrb_32bl_ldst_regoff"},
+ {"0100000"_b, "ldsmaxab_32_memop"},
+ {"0100100"_b, "ldsminab_32_memop"},
+ {"0100x10"_b, "ldrsb_64b_ldst_regoff"},
+ {"0101000"_b, "ldumaxab_32_memop"},
+ {"0101100"_b, "lduminab_32_memop"},
+ {"0101x10"_b, "ldrsb_64bl_ldst_regoff"},
+ {"0110000"_b, "ldsmaxalb_32_memop"},
+ {"0110100"_b, "ldsminalb_32_memop"},
+ {"0110x10"_b, "ldrsb_32b_ldst_regoff"},
+ {"0111000"_b, "ldumaxalb_32_memop"},
+ {"0111100"_b, "lduminalb_32_memop"},
+ {"0111x10"_b, "ldrsb_32bl_ldst_regoff"},
+ {"1000000"_b, "ldsmaxh_32_memop"},
+ {"1000100"_b, "ldsminh_32_memop"},
+ {"1001000"_b, "ldumaxh_32_memop"},
+ {"1001100"_b, "lduminh_32_memop"},
+ {"100xx10"_b, "strh_32_ldst_regoff"},
+ {"1010000"_b, "ldsmaxlh_32_memop"},
+ {"1010100"_b, "ldsminlh_32_memop"},
+ {"1011000"_b, "ldumaxlh_32_memop"},
+ {"1011100"_b, "lduminlh_32_memop"},
+ {"101xx10"_b, "ldrh_32_ldst_regoff"},
+ {"1100000"_b, "ldsmaxah_32_memop"},
+ {"1100100"_b, "ldsminah_32_memop"},
+ {"1101000"_b, "ldumaxah_32_memop"},
+ {"1101100"_b, "lduminah_32_memop"},
+ {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+ {"1110000"_b, "ldsmaxalh_32_memop"},
+ {"1110100"_b, "ldsminalh_32_memop"},
+ {"1111000"_b, "ldumaxalh_32_memop"},
+ {"1111100"_b, "lduminalh_32_memop"},
+ {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+ },
+ },
+
+ { "_nnhprs",
+ {1, 0},
+ { {"00"_b, "ret_64r_branch_reg"},
+ },
+ },
+
+ { "_nnkxgr",
+ {11, 10},
+ { {"00"_b, "ftssel_z_zz"},
+ {"10"_b, "_yhlntp"},
+ {"11"_b, "_rsqmgk"},
},
},
- { "DecodeSVE01101000",
- {12, 11},
- { {"00", "VisitSVEFPMulAddIndex"},
- {"1x", "VisitSVEFPComplexMulAddIndex"},
+ { "_nnkyzr",
+ {18, 17, 16},
+ { {"011"_b, "_yvgqjx"},
},
},
- { "UnallocSVEFPMulIndex",
- {12, 11, 10},
- { {"000", "VisitSVEFPMulIndex"},
+ { "_nnllqy",
+ {18, 17},
+ { {"00"_b, "ld1_asisdlso_s1_1s"},
},
},
- { "DecodeSVE01110001",
- {20, 19, 12},
- { {"00x", "VisitSVEFPFastReduction"},
- {"011", "VisitSVEFPUnaryOpUnpredicated"},
- {"10x", "VisitSVEFPCompareWithZero"},
- {"11x", "VisitSVEFPAccumulatingReduction"},
+ { "_nnlvqz",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_d_floatimm"},
},
},
- { "UnallocSVEFPTrigMulAddCoefficient",
- {12, 11, 10},
- { {"000", "VisitSVEFPTrigMulAddCoefficient"},
+ { "_nnzhgm",
+ {19, 18, 17, 16, 4},
+ { {"0000x"_b, "brka_p_p_p"},
+ {"10000"_b, "brkn_p_p_pp"},
},
},
- { "UnallocSVEFPArithmeticWithImm_Predicated",
- {9, 8, 7, 6},
- { {"0000", "VisitSVEFPArithmeticWithImm_Predicated"},
+ { "_nqgqjh",
+ {30, 23, 22, 20, 19},
+ { {"0xxxx"_b, "bl_only_branch_imm"},
+ {"10001"_b, "sys_cr_systeminstrs"},
+ {"1001x"_b, "msr_sr_systemmove"},
},
},
- { "DecodeSVE01110100",
- {20, 19},
- { {"0x", "VisitSVEFPArithmetic_Predicated"},
- {"10", "UnallocSVEFPTrigMulAddCoefficient"},
- {"11", "UnallocSVEFPArithmeticWithImm_Predicated"},
+ { "_nqkhrv",
+ {30, 13},
+ { {"10"_b, "fnmla_z_p_zzz"},
+ {"11"_b, "fnmls_z_p_zzz"},
},
},
- { "DecodeSVE01110101",
- {20, 19, 18},
- { {"00x", "VisitSVEFPRoundToIntegralValue"},
- {"010", "VisitSVEFPConvertPrecision"},
- {"011", "VisitSVEFPUnaryOp"},
- {"10x", "VisitSVEIntConvertToFP"},
- {"11x", "VisitSVEFPConvertToInt"},
+ { "_nqlgtn",
+ {23, 20, 19, 18, 17, 16, 13},
+ { {"0000000"_b, "ld2r_asisdlso_r2"},
+ {"0000001"_b, "ld4r_asisdlso_r4"},
+ {"10xxxx0"_b, "ld2r_asisdlsop_rx2_r"},
+ {"10xxxx1"_b, "ld4r_asisdlsop_rx4_r"},
+ {"110xxx0"_b, "ld2r_asisdlsop_rx2_r"},
+ {"110xxx1"_b, "ld4r_asisdlsop_rx4_r"},
+ {"1110xx0"_b, "ld2r_asisdlsop_rx2_r"},
+ {"1110xx1"_b, "ld4r_asisdlsop_rx4_r"},
+ {"11110x0"_b, "ld2r_asisdlsop_rx2_r"},
+ {"11110x1"_b, "ld4r_asisdlsop_rx4_r"},
+ {"1111100"_b, "ld2r_asisdlsop_rx2_r"},
+ {"1111101"_b, "ld4r_asisdlsop_rx4_r"},
+ {"1111110"_b, "ld2r_asisdlsop_r2_i"},
+ {"1111111"_b, "ld4r_asisdlsop_r4_i"},
},
},
- { "UnallocSVELoadAndBroadcastElement",
- {22},
- { {"1", "VisitSVELoadAndBroadcastElement"},
+ { "_nqysxy",
+ {0},
+ { {"1"_b, "blraaz_64_branch_reg"},
},
},
- { "DecodeSVE100x0110",
- {22, 4},
- { {"00", "VisitSVEContiguousPrefetch_ScalarPlusScalar"},
- {"1x", "VisitSVELoadAndBroadcastElement"},
+ { "_nrrmtx",
+ {22, 13, 12},
+ { {"000"_b, "swpa_64_memop"},
+ {"100"_b, "swpal_64_memop"},
},
},
- { "DecodeSVE100x0111",
- {22, 4},
- { {"00", "VisitSVE32BitGatherPrefetch_VectorPlusImm"},
- {"1x", "VisitSVELoadAndBroadcastElement"},
+ { "_nrssjz",
+ {17},
+ { {"0"_b, "ld3_asisdlso_b3_3b"},
},
},
- { "DecodeSVE100x11xx",
- {22},
- { {"0", "VisitSVE32BitGatherLoad_VectorPlusImm"},
- {"1", "VisitSVELoadAndBroadcastElement"},
+ { "_nshjhk",
+ {17, 9, 8, 7, 6, 5},
+ { {"000000"_b, "aesimc_z_z"},
+ {"1xxxxx"_b, "aesd_z_zz"},
},
},
- { "DecodeSVE100010xx",
- {23, 4},
- { {"00", "VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets"},
- {"1x", "VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets"},
+ { "_nsjhhg",
+ {30, 13},
+ { {"00"_b, "_jhllmn"},
+ {"01"_b, "_htplsj"},
+ {"10"_b, "_rztvnl"},
+ {"11"_b, "_vgtnjh"},
},
},
- { "DecodeSVE100100x1",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_nsnyxt",
+ {23},
+ { {"0"_b, "fmla_asimdsame_only"},
+ {"1"_b, "fmls_asimdsame_only"},
},
},
- { "DecodeSVE10010000",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
- {"100", "VisitSVELoadPredicateRegister"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_nssrnm",
+ {20, 18, 17, 16},
+ { {"0000"_b, "_lnpvky"},
},
},
- { "DecodeSVE10010010",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets"},
- {"10x", "VisitSVELoadVectorRegister"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_nszhhy",
+ {17},
+ { {"0"_b, "ld2_asisdlsep_r2_r"},
+ {"1"_b, "ld2_asisdlsep_i2_i"},
},
},
- { "DecodeSVE100110x1",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_nthvqx",
+ {23, 22},
+ { {"00"_b, "eor_asimdsame_only"},
+ {"01"_b, "bsl_asimdsame_only"},
+ {"10"_b, "bit_asimdsame_only"},
+ {"11"_b, "bif_asimdsame_only"},
},
},
- { "DecodeSVE10011000",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
- {"100", "VisitSVELoadPredicateRegister"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_ntjpsx",
+ {22, 20, 11},
+ { {"000"_b, "uqincb_r_rs_uw"},
+ {"001"_b, "uqdecb_r_rs_uw"},
+ {"010"_b, "uqincb_r_rs_x"},
+ {"011"_b, "uqdecb_r_rs_x"},
+ {"100"_b, "uqinch_r_rs_uw"},
+ {"101"_b, "uqdech_r_rs_uw"},
+ {"110"_b, "uqinch_r_rs_x"},
+ {"111"_b, "uqdech_r_rs_x"},
},
},
- { "DecodeSVE10011010",
- {23, 22, 4},
- { {"0xx", "VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets"},
- {"10x", "VisitSVELoadVectorRegister"},
- {"110", "VisitSVEContiguousPrefetch_ScalarPlusImm"},
+ { "_ntkhsm",
+ {13, 12},
+ { {"00"_b, "cmtst_asisdsame_only"},
+ },
+ },
+
+ { "_ntkqhk",
+ {11, 10, 9, 8, 7, 6},
+ { {"000000"_b, "yield_hi_hints"},
+ {"000001"_b, "wfi_hi_hints"},
+ {"000010"_b, "sevl_hi_hints"},
+ {"000011"_b, "xpaclri_hi_hints"},
+ {"001000"_b, "psb_hc_hints"},
+ {"0010x1"_b, "hint_hm_hints"},
+ {"001100"_b, "paciasp_hi_hints"},
+ {"001101"_b, "pacibsp_hi_hints"},
+ {"001110"_b, "autiasp_hi_hints"},
+ {"001111"_b, "autibsp_hi_hints"},
+ {"0x01xx"_b, "hint_hm_hints"},
+ {"0x1010"_b, "hint_hm_hints"},
+ {"10x0xx"_b, "hint_hm_hints"},
+ {"10x1xx"_b, "hint_hm_hints"},
+ {"1101xx"_b, "hint_hm_hints"},
+ {"111010"_b, "hint_hm_hints"},
+ {"x100xx"_b, "hint_hm_hints"},
+ {"x1100x"_b, "hint_hm_hints"},
+ {"x11011"_b, "hint_hm_hints"},
+ {"x111xx"_b, "hint_hm_hints"},
+ },
+ },
+
+ { "_nvkthr",
+ {30, 13},
+ { {"00"_b, "_kjqynn"},
+ {"01"_b, "_jgyhrh"},
+ {"10"_b, "_jymnkk"},
+ {"11"_b, "_pqjjsh"},
+ },
+ },
+
+ { "_nvqlyn",
+ {16, 13, 12},
+ { {"000"_b, "rev_64_dp_1src"},
+ {"100"_b, "pacdb_64p_dp_1src"},
+ {"101"_b, "autdb_64p_dp_1src"},
+ {"110"_b, "_hhnjjk"},
+ {"111"_b, "_yvnjkr"},
+ },
+ },
+
+ { "_nvthzh",
+ {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
+ { {"000010011111"_b, "xpacd_64z_dp_1src"},
+ },
+ },
+
+ { "_nvyxmh",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "add_z_p_zz"},
+ {"00001"_b, "sub_z_p_zz"},
+ {"00011"_b, "subr_z_p_zz"},
+ {"01000"_b, "smax_z_p_zz"},
+ {"01001"_b, "umax_z_p_zz"},
+ {"01010"_b, "smin_z_p_zz"},
+ {"01011"_b, "umin_z_p_zz"},
+ {"01100"_b, "sabd_z_p_zz"},
+ {"01101"_b, "uabd_z_p_zz"},
+ {"10000"_b, "mul_z_p_zz"},
+ {"10010"_b, "smulh_z_p_zz"},
+ {"10011"_b, "umulh_z_p_zz"},
+ {"10100"_b, "sdiv_z_p_zz"},
+ {"10101"_b, "udiv_z_p_zz"},
+ {"10110"_b, "sdivr_z_p_zz"},
+ {"10111"_b, "udivr_z_p_zz"},
+ {"11000"_b, "orr_z_p_zz"},
+ {"11001"_b, "eor_z_p_zz"},
+ {"11010"_b, "and_z_p_zz"},
+ {"11011"_b, "bic_z_p_zz"},
+ },
+ },
+
+ { "_nxjgmm",
+ {17},
+ { {"0"_b, "st3_asisdlsop_bx3_r3b"},
+ {"1"_b, "st3_asisdlsop_b3_i3b"},
+ },
+ },
+
+ { "_nxjkqs",
+ {23, 22, 12, 11, 10},
+ { {"0x000"_b, "fmla_z_zzzi_h"},
+ {"0x001"_b, "fmls_z_zzzi_h"},
+ {"10000"_b, "fmla_z_zzzi_s"},
+ {"10001"_b, "fmls_z_zzzi_s"},
+ {"101xx"_b, "fcmla_z_zzzi_h"},
+ {"11000"_b, "fmla_z_zzzi_d"},
+ {"11001"_b, "fmls_z_zzzi_d"},
+ {"111xx"_b, "fcmla_z_zzzi_s"},
+ },
+ },
+
+ { "_nxmjvy",
+ {30, 23, 11, 10},
+ { {"1001"_b, "_jksztq"},
+ },
+ },
+
+ { "_nxqygl",
+ {13},
+ { {"0"_b, "mla_asimdelem_r"},
+ {"1"_b, "umlal_asimdelem_l"},
+ },
+ },
+
+ { "_nxyhyv",
+ {30, 11, 10},
+ { {"000"_b, "_kvyysq"},
+ {"001"_b, "_rvjzgt"},
+ {"010"_b, "_vjlnqj"},
+ {"011"_b, "_jvvzjq"},
+ {"100"_b, "_tzzhsk"},
+ {"101"_b, "_mplskr"},
+ {"110"_b, "_njgmvx"},
+ {"111"_b, "_ntkhsm"},
+ },
+ },
+
+ { "_nykvly",
+ {16, 13, 12},
+ { {"000"_b, "rev32_64_dp_1src"},
+ {"100"_b, "pacda_64p_dp_1src"},
+ {"101"_b, "autda_64p_dp_1src"},
+ {"110"_b, "_mgqvvn"},
+ {"111"_b, "_xvlnmy"},
+ },
+ },
+
+ { "_nyssqn",
+ {12},
+ { {"0"_b, "st2_asisdlsop_dx2_r2d"},
+ },
+ },
+
+ { "_nyxxks",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "suqadd_asimdmisc_r"},
+ {"10000"_b, "saddlv_asimdall_only"},
+ },
+ },
+
+ { "_nzkhrj",
+ {17},
+ { {"0"_b, "st4_asisdlsep_r4_r"},
+ {"1"_b, "st4_asisdlsep_i4_i"},
+ },
+ },
+
+ { "_nzqkky",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "rev32_asimdmisc_r"},
+ },
+ },
+
+ { "_pgjjsz",
+ {30, 13, 12, 11, 10},
+ { {"00000"_b, "_lmyxhr"},
+ {"00001"_b, "_tmhlvh"},
+ {"00010"_b, "_qvtxpr"},
+ {"00011"_b, "_ymkthj"},
+ {"00100"_b, "_rhmxyp"},
+ {"00101"_b, "_zryvjk"},
+ {"01000"_b, "zip1_z_zz"},
+ {"01001"_b, "zip2_z_zz"},
+ {"01010"_b, "uzp1_z_zz"},
+ {"01011"_b, "uzp2_z_zz"},
+ {"01100"_b, "trn1_z_zz"},
+ {"01101"_b, "trn2_z_zz"},
+ {"10000"_b, "_llvrrk"},
+ {"10001"_b, "_qyjvqr"},
+ {"10010"_b, "_tmtnkq"},
+ {"10011"_b, "_gpxltv"},
+ {"10100"_b, "_pnlnzt"},
+ {"10101"_b, "_pygvrr"},
+ {"11000"_b, "addhnb_z_zz"},
+ {"11001"_b, "addhnt_z_zz"},
+ {"11010"_b, "raddhnb_z_zz"},
+ {"11011"_b, "raddhnt_z_zz"},
+ {"11100"_b, "subhnb_z_zz"},
+ {"11101"_b, "subhnt_z_zz"},
+ {"11110"_b, "rsubhnb_z_zz"},
+ {"11111"_b, "rsubhnt_z_zz"},
+ },
+ },
+
+ { "_phthqj",
+ {30, 13},
+ { {"00"_b, "_sntyqy"},
+ {"01"_b, "_xhlhmh"},
+ {"10"_b, "_rtrlts"},
+ {"11"_b, "_jzkqhn"},
+ },
+ },
+
+ { "_phtnny",
+ {18, 17},
+ { {"0x"_b, "ld1_asisdlsep_r3_r3"},
+ {"10"_b, "ld1_asisdlsep_r3_r3"},
+ {"11"_b, "ld1_asisdlsep_i3_i3"},
+ },
+ },
+
+ { "_phvnqh",
+ {30},
+ { {"0"_b, "bic_32_log_shift"},
+ {"1"_b, "eon_32_log_shift"},
+ },
+ },
+
+ { "_phxkzh",
+ {17, 4},
+ { {"00"_b, "fcmlt_p_p_z0"},
+ {"01"_b, "fcmle_p_p_z0"},
+ {"10"_b, "fcmne_p_p_z0"},
+ },
+ },
+
+ { "_pjgkjs",
+ {18, 17},
+ { {"00"_b, "_mxnzyr"},
+ },
+ },
+
+ { "_pjkylt",
+ {23, 22},
+ { {"00"_b, "fcsel_s_floatsel"},
+ {"01"_b, "fcsel_d_floatsel"},
+ {"11"_b, "fcsel_h_floatsel"},
+ },
+ },
+
+ { "_plktrh",
+ {30, 23},
+ { {"00"_b, "adds_32s_addsub_imm"},
+ {"10"_b, "subs_32s_addsub_imm"},
+ },
+ },
+
+ { "_plltlx",
+ {23},
+ { {"0"_b, "fadd_asimdsame_only"},
+ {"1"_b, "fsub_asimdsame_only"},
+ },
+ },
+
+ { "_pmkxlj",
+ {17},
+ { {"0"_b, "st1_asisdlse_r2_2v"},
+ },
+ },
+
+ { "_pmrngh",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_snkqvp"},
+ },
+ },
+
+ { "_pnlnzt",
+ {23, 18, 17, 16},
+ { {"0000"_b, "sqxtunb_z_zz"},
+ },
+ },
+
+ { "_pnqxjg",
+ {4},
+ { {"0"_b, "ccmn_32_condcmp_reg"},
+ },
+ },
+
+ { "_pnxggm",
+ {4, 3, 2, 1, 0},
+ { {"00000"_b, "fcmp_d_floatcmp"},
+ {"01000"_b, "fcmp_dz_floatcmp"},
+ {"10000"_b, "fcmpe_d_floatcmp"},
+ {"11000"_b, "fcmpe_dz_floatcmp"},
+ },
+ },
+
+ { "_pnxgrg",
+ {30, 23, 22},
+ { {"000"_b, "madd_32a_dp_3src"},
+ },
+ },
+
+ { "_pnzphx",
+ {17},
+ { {"1"_b, "frecpe_z_z"},
+ },
+ },
+
+ { "_pphhym",
+ {30, 23, 22},
+ { {"00x"_b, "add_32_addsub_shift"},
+ {"010"_b, "add_32_addsub_shift"},
+ {"10x"_b, "sub_32_addsub_shift"},
+ {"110"_b, "sub_32_addsub_shift"},
+ },
+ },
+
+ { "_ppllxt",
+ {18, 17},
+ { {"00"_b, "ld1_asisdlse_r3_3v"},
+ },
+ },
+
+ { "_ppnssm",
+ {30, 13, 12},
+ { {"000"_b, "_ktyppm"},
+ {"001"_b, "_ngzyqj"},
+ {"010"_b, "_yxnslx"},
+ {"011"_b, "_nnkxgr"},
+ {"100"_b, "_kzmvpk"},
+ {"101"_b, "_thrxph"},
+ {"110"_b, "_kgpgly"},
+ {"111"_b, "_yppszx"},
+ },
+ },
+
+ { "_pppsmg",
+ {30},
+ { {"0"_b, "_xyhmgh"},
+ {"1"_b, "_rlrjxp"},
+ },
+ },
+
+ { "_ppqkym",
+ {30, 23, 22, 11, 10},
+ { {"10001"_b, "stg_64spost_ldsttags"},
+ {"10010"_b, "stg_64soffset_ldsttags"},
+ {"10011"_b, "stg_64spre_ldsttags"},
+ {"10100"_b, "ldg_64loffset_ldsttags"},
+ {"10101"_b, "stzg_64spost_ldsttags"},
+ {"10110"_b, "stzg_64soffset_ldsttags"},
+ {"10111"_b, "stzg_64spre_ldsttags"},
+ {"11001"_b, "st2g_64spost_ldsttags"},
+ {"11010"_b, "st2g_64soffset_ldsttags"},
+ {"11011"_b, "st2g_64spre_ldsttags"},
+ {"11101"_b, "stz2g_64spost_ldsttags"},
+ {"11110"_b, "stz2g_64soffset_ldsttags"},
+ {"11111"_b, "stz2g_64spre_ldsttags"},
+ },
+ },
+
+ { "_pqjjsh",
+ {23, 22, 12, 10},
+ { {"1000"_b, "fmlslb_z_zzzi_s"},
+ {"1001"_b, "fmlslt_z_zzzi_s"},
+ },
+ },
+
+ { "_pqpzkt",
+ {11, 10, 9, 8, 7, 6},
+ { {"000000"_b, "nop_hi_hints"},
+ {"000001"_b, "wfe_hi_hints"},
+ {"000010"_b, "sev_hi_hints"},
+ {"000011"_b, "dgh_hi_hints"},
+ {"000100"_b, "pacia1716_hi_hints"},
+ {"000101"_b, "pacib1716_hi_hints"},
+ {"000110"_b, "autia1716_hi_hints"},
+ {"000111"_b, "autib1716_hi_hints"},
+ {"001000"_b, "esb_hi_hints"},
+ {"001001"_b, "tsb_hc_hints"},
+ {"001010"_b, "csdb_hi_hints"},
+ {"001100"_b, "paciaz_hi_hints"},
+ {"001101"_b, "pacibz_hi_hints"},
+ {"001110"_b, "autiaz_hi_hints"},
+ {"001111"_b, "autibz_hi_hints"},
+ {"0100xx"_b, "bti_hb_hints"},
+ {"0x1011"_b, "hint_hm_hints"},
+ {"10x0xx"_b, "hint_hm_hints"},
+ {"10x1xx"_b, "hint_hm_hints"},
+ {"1100xx"_b, "hint_hm_hints"},
+ {"111011"_b, "hint_hm_hints"},
+ {"x1100x"_b, "hint_hm_hints"},
+ {"x11010"_b, "hint_hm_hints"},
+ {"x1x1xx"_b, "hint_hm_hints"},
+ },
+ },
+
+ { "_pqtjgx",
+ {23, 22, 13, 12, 11, 10},
+ { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
+ {"0x0001"_b, "sri_asimdshf_r"},
+ {"0x0101"_b, "sli_asimdshf_r"},
+ {"0x1001"_b, "sqshlu_asimdshf_r"},
+ {"0x1101"_b, "uqshl_asimdshf_r"},
+ {"10x1x0"_b, "fcmla_asimdelem_c_s"},
+ {"xx00x0"_b, "mls_asimdelem_r"},
+ {"xx10x0"_b, "umlsl_asimdelem_l"},
+ },
+ },
+
+ { "_prkmty",
+ {23, 22, 9},
+ { {"000"_b, "brkpa_p_p_pp"},
+ {"010"_b, "brkpas_p_p_pp"},
+ },
+ },
+
+ { "_pslllp",
+ {30, 23, 22, 20, 19, 12, 11},
+ { {"0000000"_b, "movi_asimdimm_d_ds"},
+ {"1000000"_b, "movi_asimdimm_d2_d"},
+ {"1000010"_b, "fmov_asimdimm_d2_d"},
+ {"x00x100"_b, "ucvtf_asimdshf_c"},
+ {"x00x111"_b, "fcvtzu_asimdshf_c"},
+ {"x010x00"_b, "ucvtf_asimdshf_c"},
+ {"x010x11"_b, "fcvtzu_asimdshf_c"},
+ {"x011100"_b, "ucvtf_asimdshf_c"},
+ {"x011111"_b, "fcvtzu_asimdshf_c"},
+ {"x0x1000"_b, "ucvtf_asimdshf_c"},
+ {"x0x1011"_b, "fcvtzu_asimdshf_c"},
+ },
+ },
+
+ { "_psqpkp",
+ {17, 4},
+ { {"00"_b, "fcmge_p_p_z0"},
+ {"01"_b, "fcmgt_p_p_z0"},
+ {"10"_b, "fcmeq_p_p_z0"},
+ },
+ },
+
+ { "_ptjyqx",
+ {13},
+ { {"0"_b, "fcmuo_p_p_zz"},
+ },
+ },
+
+ { "_ptkrvg",
+ {12},
+ { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+ },
+ },
+
+ { "_ptsjnr",
+ {30, 20, 19, 18, 17, 16, 13},
+ { {"0000000"_b, "asr_z_p_zi"},
+ {"0000010"_b, "lsr_z_p_zi"},
+ {"0000110"_b, "lsl_z_p_zi"},
+ {"0001000"_b, "asrd_z_p_zi"},
+ {"0001100"_b, "sqshl_z_p_zi"},
+ {"0001110"_b, "uqshl_z_p_zi"},
+ {"0011000"_b, "srshr_z_p_zi"},
+ {"0011010"_b, "urshr_z_p_zi"},
+ {"0011110"_b, "sqshlu_z_p_zi"},
+ {"0100000"_b, "asr_z_p_zz"},
+ {"0100001"_b, "sxtb_z_p_z"},
+ {"0100010"_b, "lsr_z_p_zz"},
+ {"0100011"_b, "uxtb_z_p_z"},
+ {"0100101"_b, "sxth_z_p_z"},
+ {"0100110"_b, "lsl_z_p_zz"},
+ {"0100111"_b, "uxth_z_p_z"},
+ {"0101000"_b, "asrr_z_p_zz"},
+ {"0101001"_b, "sxtw_z_p_z"},
+ {"0101010"_b, "lsrr_z_p_zz"},
+ {"0101011"_b, "uxtw_z_p_z"},
+ {"0101101"_b, "abs_z_p_z"},
+ {"0101110"_b, "lslr_z_p_zz"},
+ {"0101111"_b, "neg_z_p_z"},
+ {"0110000"_b, "asr_z_p_zw"},
+ {"0110001"_b, "cls_z_p_z"},
+ {"0110010"_b, "lsr_z_p_zw"},
+ {"0110011"_b, "clz_z_p_z"},
+ {"0110101"_b, "cnt_z_p_z"},
+ {"0110110"_b, "lsl_z_p_zw"},
+ {"0110111"_b, "cnot_z_p_z"},
+ {"0111001"_b, "fabs_z_p_z"},
+ {"0111011"_b, "fneg_z_p_z"},
+ {"0111101"_b, "not_z_p_z"},
+ {"1000001"_b, "urecpe_z_p_z"},
+ {"1000011"_b, "ursqrte_z_p_z"},
+ {"1000100"_b, "srshl_z_p_zz"},
+ {"1000110"_b, "urshl_z_p_zz"},
+ {"1001001"_b, "sadalp_z_p_z"},
+ {"1001011"_b, "uadalp_z_p_z"},
+ {"1001100"_b, "srshlr_z_p_zz"},
+ {"1001110"_b, "urshlr_z_p_zz"},
+ {"1010000"_b, "sqshl_z_p_zz"},
+ {"1010001"_b, "sqabs_z_p_z"},
+ {"1010010"_b, "uqshl_z_p_zz"},
+ {"1010011"_b, "sqneg_z_p_z"},
+ {"1010100"_b, "sqrshl_z_p_zz"},
+ {"1010110"_b, "uqrshl_z_p_zz"},
+ {"1011000"_b, "sqshlr_z_p_zz"},
+ {"1011010"_b, "uqshlr_z_p_zz"},
+ {"1011100"_b, "sqrshlr_z_p_zz"},
+ {"1011110"_b, "uqrshlr_z_p_zz"},
+ {"1100000"_b, "shadd_z_p_zz"},
+ {"1100010"_b, "uhadd_z_p_zz"},
+ {"1100011"_b, "addp_z_p_zz"},
+ {"1100100"_b, "shsub_z_p_zz"},
+ {"1100110"_b, "uhsub_z_p_zz"},
+ {"1101000"_b, "srhadd_z_p_zz"},
+ {"1101001"_b, "smaxp_z_p_zz"},
+ {"1101010"_b, "urhadd_z_p_zz"},
+ {"1101011"_b, "umaxp_z_p_zz"},
+ {"1101100"_b, "shsubr_z_p_zz"},
+ {"1101101"_b, "sminp_z_p_zz"},
+ {"1101110"_b, "uhsubr_z_p_zz"},
+ {"1101111"_b, "uminp_z_p_zz"},
+ {"1110000"_b, "sqadd_z_p_zz"},
+ {"1110010"_b, "uqadd_z_p_zz"},
+ {"1110100"_b, "sqsub_z_p_zz"},
+ {"1110110"_b, "uqsub_z_p_zz"},
+ {"1111000"_b, "suqadd_z_p_zz"},
+ {"1111010"_b, "usqadd_z_p_zz"},
+ {"1111100"_b, "sqsubr_z_p_zz"},
+ {"1111110"_b, "uqsubr_z_p_zz"},
+ },
+ },
+
+ { "_ptslzg",
+ {30, 23, 22, 13, 4},
+ { {"01000"_b, "ldr_p_bi"},
+ {"01100"_b, "prfb_i_p_bi_s"},
+ {"01110"_b, "prfh_i_p_bi_s"},
+ {"10x0x"_b, "ld1sw_z_p_bz_d_x32_scaled"},
+ {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_scaled"},
+ },
+ },
+
+ { "_pvkmmv",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0000000"_b, "ldsmax_32_memop"},
+ {"0000100"_b, "ldsmin_32_memop"},
+ {"0001000"_b, "ldumax_32_memop"},
+ {"0001100"_b, "ldumin_32_memop"},
+ {"000xx10"_b, "str_32_ldst_regoff"},
+ {"0010000"_b, "ldsmaxl_32_memop"},
+ {"0010100"_b, "ldsminl_32_memop"},
+ {"0011000"_b, "ldumaxl_32_memop"},
+ {"0011100"_b, "lduminl_32_memop"},
+ {"001xx10"_b, "ldr_32_ldst_regoff"},
+ {"0100000"_b, "ldsmaxa_32_memop"},
+ {"0100100"_b, "ldsmina_32_memop"},
+ {"0101000"_b, "ldumaxa_32_memop"},
+ {"0101100"_b, "ldumina_32_memop"},
+ {"010xx10"_b, "ldrsw_64_ldst_regoff"},
+ {"0110000"_b, "ldsmaxal_32_memop"},
+ {"0110100"_b, "ldsminal_32_memop"},
+ {"0111000"_b, "ldumaxal_32_memop"},
+ {"0111100"_b, "lduminal_32_memop"},
+ {"1000000"_b, "ldsmax_64_memop"},
+ {"1000100"_b, "ldsmin_64_memop"},
+ {"1001000"_b, "ldumax_64_memop"},
+ {"1001100"_b, "ldumin_64_memop"},
+ {"100xx10"_b, "str_64_ldst_regoff"},
+ {"1010000"_b, "ldsmaxl_64_memop"},
+ {"1010100"_b, "ldsminl_64_memop"},
+ {"1011000"_b, "ldumaxl_64_memop"},
+ {"1011100"_b, "lduminl_64_memop"},
+ {"101xx10"_b, "ldr_64_ldst_regoff"},
+ {"10xxx01"_b, "ldraa_64_ldst_pac"},
+ {"10xxx11"_b, "ldraa_64w_ldst_pac"},
+ {"1100000"_b, "ldsmaxa_64_memop"},
+ {"1100100"_b, "ldsmina_64_memop"},
+ {"1101000"_b, "ldumaxa_64_memop"},
+ {"1101100"_b, "ldumina_64_memop"},
+ {"110xx10"_b, "prfm_p_ldst_regoff"},
+ {"1110000"_b, "ldsmaxal_64_memop"},
+ {"1110100"_b, "ldsminal_64_memop"},
+ {"1111000"_b, "ldumaxal_64_memop"},
+ {"1111100"_b, "lduminal_64_memop"},
+ {"11xxx01"_b, "ldrab_64_ldst_pac"},
+ {"11xxx11"_b, "ldrab_64w_ldst_pac"},
+ },
+ },
+
+ { "_pvrylp",
+ {13, 12},
+ { {"00"_b, "sbc_64_addsub_carry"},
+ },
+ },
+
+ { "_pxgztg",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "bic_asimdimm_l_sl"},
+ {"00x100"_b, "sli_asimdshf_r"},
+ {"00x110"_b, "uqshl_asimdshf_r"},
+ {"010x00"_b, "sli_asimdshf_r"},
+ {"010x10"_b, "uqshl_asimdshf_r"},
+ {"011100"_b, "sli_asimdshf_r"},
+ {"011110"_b, "uqshl_asimdshf_r"},
+ {"0x1000"_b, "sli_asimdshf_r"},
+ {"0x1010"_b, "uqshl_asimdshf_r"},
+ },
+ },
+
+ { "_pxkqxn",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "cmle_asisdmisc_z"},
+ },
+ },
+
+ { "_pxlnhs",
+ {23, 20, 19, 18, 17, 16},
+ { {"000001"_b, "fcvtxn_asimdmisc_n"},
+ {"x00000"_b, "uadalp_asimdmisc_p"},
+ },
+ },
+
+ { "_pxnnrz",
+ {20, 19, 18, 17, 16, 13, 12, 3, 2, 1, 0},
+ { {"00000001101"_b, "setf16_only_setf"},
+ },
+ },
+
+ { "_pxtsvn",
+ {20, 19, 18, 17, 16},
+ { {"10000"_b, "fminp_asisdpair_only_sd"},
+ },
+ },
+
+ { "_pxyrpm",
+ {22, 11},
+ { {"00"_b, "sqdmulh_z_zzi_s"},
+ {"01"_b, "mul_z_zzi_s"},
+ {"10"_b, "sqdmulh_z_zzi_d"},
+ {"11"_b, "mul_z_zzi_d"},
+ },
+ },
+
+ { "_pxzkjy",
+ {30},
+ { {"1"_b, "_yplktv"},
+ },
+ },
+
+ { "_pygvrr",
+ {23, 18, 17, 16},
+ { {"0000"_b, "sqxtunt_z_zz"},
+ },
+ },
+
+ { "_qghmks",
+ {13, 12},
+ { {"00"_b, "subp_64s_dp_2src"},
+ {"01"_b, "irg_64i_dp_2src"},
+ {"10"_b, "lslv_64_dp_2src"},
+ {"11"_b, "pacga_64p_dp_2src"},
+ },
+ },
+
+ { "_qgmngg",
+ {30, 23},
+ { {"00"_b, "orr_64_log_imm"},
+ {"10"_b, "ands_64s_log_imm"},
+ {"11"_b, "movk_64_movewide"},
+ },
+ },
+
+ { "_qgryzh",
+ {18, 17},
+ { {"0x"_b, "st1_asisdlsep_r3_r3"},
+ {"10"_b, "st1_asisdlsep_r3_r3"},
+ {"11"_b, "st1_asisdlsep_i3_i3"},
+ },
+ },
+
+ { "_qgymsy",
+ {11},
+ { {"0"_b, "_hmsgpj"},
+ },
+ },
+
+ { "_qhgtvk",
+ {30, 23, 22},
+ { {"00x"_b, "adds_32_addsub_shift"},
+ {"010"_b, "adds_32_addsub_shift"},
+ {"10x"_b, "subs_32_addsub_shift"},
+ {"110"_b, "subs_32_addsub_shift"},
+ },
+ },
+
+ { "_qhsplz",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "frintn_asimdmiscfp16_r"},
+ {"0x00001"_b, "frintn_asimdmisc_r"},
+ {"1111001"_b, "frintp_asimdmiscfp16_r"},
+ {"1x00001"_b, "frintp_asimdmisc_r"},
+ {"xx00000"_b, "cmgt_asimdmisc_z"},
+ },
+ },
+
+ { "_qhtqrj",
+ {30, 23, 22},
+ { {"000"_b, "stnp_s_ldstnapair_offs"},
+ {"001"_b, "ldnp_s_ldstnapair_offs"},
+ {"010"_b, "stp_s_ldstpair_post"},
+ {"011"_b, "ldp_s_ldstpair_post"},
+ {"100"_b, "stnp_d_ldstnapair_offs"},
+ {"101"_b, "ldnp_d_ldstnapair_offs"},
+ {"110"_b, "stp_d_ldstpair_post"},
+ {"111"_b, "ldp_d_ldstpair_post"},
+ },
+ },
+
+ { "_qhtrnn",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "stur_32_ldst_unscaled"},
+ {"00001"_b, "str_32_ldst_immpost"},
+ {"00010"_b, "sttr_32_ldst_unpriv"},
+ {"00011"_b, "str_32_ldst_immpre"},
+ {"00100"_b, "ldur_32_ldst_unscaled"},
+ {"00101"_b, "ldr_32_ldst_immpost"},
+ {"00110"_b, "ldtr_32_ldst_unpriv"},
+ {"00111"_b, "ldr_32_ldst_immpre"},
+ {"01000"_b, "ldursw_64_ldst_unscaled"},
+ {"01001"_b, "ldrsw_64_ldst_immpost"},
+ {"01010"_b, "ldtrsw_64_ldst_unpriv"},
+ {"01011"_b, "ldrsw_64_ldst_immpre"},
+ {"10000"_b, "stur_64_ldst_unscaled"},
+ {"10001"_b, "str_64_ldst_immpost"},
+ {"10010"_b, "sttr_64_ldst_unpriv"},
+ {"10011"_b, "str_64_ldst_immpre"},
+ {"10100"_b, "ldur_64_ldst_unscaled"},
+ {"10101"_b, "ldr_64_ldst_immpost"},
+ {"10110"_b, "ldtr_64_ldst_unpriv"},
+ {"10111"_b, "ldr_64_ldst_immpre"},
+ {"11000"_b, "prfum_p_ldst_unscaled"},
+ },
+ },
+
+ { "_qhxzxl",
+ {17},
+ { {"0"_b, "ld1_asisdlse_r2_2v"},
+ },
+ },
+
+ { "_qjyvln",
+ {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5},
+ { {"000010011111"_b, "xpaci_64z_dp_1src"},
},
},
- { "UnallocSVELoadAndBroadcastQuadword_ScalarPlusImm",
+ { "_qkyjhg",
+ {30},
+ { {"0"_b, "ldr_32_loadlit"},
+ {"1"_b, "ldr_64_loadlit"},
+ },
+ },
+
+ { "_qkzlkj",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "sshr_asisdshf_r"},
+ {"001x0"_b, "sshr_asisdshf_r"},
+ {"01xx0"_b, "sshr_asisdshf_r"},
+ },
+ },
+
+ { "_qljhnp",
+ {22},
+ { {"0"_b, "sqdmullt_z_zzi_s"},
+ {"1"_b, "sqdmullt_z_zzi_d"},
+ },
+ },
+
+ { "_qlqhzg",
{20},
- { {"0", "VisitSVELoadAndBroadcastQuadword_ScalarPlusImm"},
+ { {"0"_b, "_hzmlps"},
+ {"1"_b, "msr_sr_systemmove"},
+ },
+ },
+
+ { "_qlxksl",
+ {30},
+ { {"0"_b, "_hrxyts"},
+ {"1"_b, "_tytvjk"},
+ },
+ },
+
+ { "_qmgtyq",
+ {17},
+ { {"0"_b, "ld2_asisdlse_r2"},
+ },
+ },
+
+ { "_qmjqhq",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_h_floatimm"},
+ },
+ },
+
+ { "_qmqmpj",
+ {12, 10},
+ { {"00"_b, "_nxqygl"},
+ {"01"_b, "_skglrt"},
+ {"10"_b, "_sjlpxn"},
+ {"11"_b, "_qzxvsk"},
+ },
+ },
+
+ { "_qmrgkn",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_hsvgnt"},
+ },
+ },
+
+ { "_qmzqsy",
+ {20, 19, 18, 17},
+ { {"0000"_b, "_nykvly"},
+ },
+ },
+
+ { "_qnprqt",
+ {4},
+ { {"0"_b, "eor_p_p_pp_z"},
+ {"1"_b, "sel_p_p_pp"},
+ },
+ },
+
+ { "_qnsxkj",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fabs_d_floatdp1"},
+ {"000010"_b, "fsqrt_d_floatdp1"},
+ {"000110"_b, "fcvt_hd_floatdp1"},
+ {"001000"_b, "frintp_d_floatdp1"},
+ {"001010"_b, "frintz_d_floatdp1"},
+ {"001110"_b, "frinti_d_floatdp1"},
+ {"010000"_b, "frint32x_d_floatdp1"},
+ {"010010"_b, "frint64x_d_floatdp1"},
+ },
+ },
+
+ { "_qntssm",
+ {30, 11, 10},
+ { {"000"_b, "_hxrtsq"},
+ {"001"_b, "_ygxhyg"},
+ {"010"_b, "_nhhpqz"},
+ {"011"_b, "_vjymzn"},
+ {"101"_b, "_gszxkp"},
+ {"110"_b, "_nssrnm"},
+ {"111"_b, "_jrsptt"},
+ },
+ },
+
+ { "_qntygx",
+ {13, 12, 11, 10},
+ { {"0000"_b, "uaddl_asimddiff_l"},
+ {"0001"_b, "uhadd_asimdsame_only"},
+ {"0010"_b, "_nzqkky"},
+ {"0011"_b, "uqadd_asimdsame_only"},
+ {"0100"_b, "uaddw_asimddiff_w"},
+ {"0101"_b, "urhadd_asimdsame_only"},
+ {"0111"_b, "_nthvqx"},
+ {"1000"_b, "usubl_asimddiff_l"},
+ {"1001"_b, "uhsub_asimdsame_only"},
+ {"1010"_b, "_srmhlk"},
+ {"1011"_b, "uqsub_asimdsame_only"},
+ {"1100"_b, "usubw_asimddiff_w"},
+ {"1101"_b, "cmhi_asimdsame_only"},
+ {"1110"_b, "_mvgsjr"},
+ {"1111"_b, "cmhs_asimdsame_only"},
+ },
+ },
+
+ { "_qnvgmh",
+ {23},
+ { {"0"_b, "fmul_asimdsame_only"},
+ },
+ },
+
+ { "_qptvrm",
+ {23},
+ { {"0"_b, "fmaxnmp_asimdsame_only"},
+ {"1"_b, "fminnmp_asimdsame_only"},
},
},
- { "DecodeSVE101xx101",
+ { "_qpvgnh",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld2b_z_p_bi_contiguous"},
+ {"000x0"_b, "ld2b_z_p_br_contiguous"},
+ {"00101"_b, "ld4b_z_p_bi_contiguous"},
+ {"001x0"_b, "ld4b_z_p_br_contiguous"},
+ {"01001"_b, "ld2h_z_p_bi_contiguous"},
+ {"010x0"_b, "ld2h_z_p_br_contiguous"},
+ {"01101"_b, "ld4h_z_p_bi_contiguous"},
+ {"011x0"_b, "ld4h_z_p_br_contiguous"},
+ {"10011"_b, "st2b_z_p_bi_contiguous"},
+ {"10111"_b, "st4b_z_p_bi_contiguous"},
+ {"10x01"_b, "st1b_z_p_bi"},
+ {"11011"_b, "st2h_z_p_bi_contiguous"},
+ {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"},
+ {"11111"_b, "st4h_z_p_bi_contiguous"},
+ {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"},
+ {"11x01"_b, "st1h_z_p_bi"},
+ },
+ },
+
+ { "_qpzynz",
+ {23, 22},
+ { {"00"_b, "_jkpsxk"},
+ },
+ },
+
+ { "_qqpkkm",
+ {9, 8, 7, 6, 5, 1, 0},
+ { {"1111111"_b, "eretaa_64e_branch_reg"},
+ },
+ },
+
+ { "_qqpqnm",
+ {18, 17},
+ { {"0x"_b, "st1_asisdlsop_sx1_r1s"},
+ {"10"_b, "st1_asisdlsop_sx1_r1s"},
+ {"11"_b, "st1_asisdlsop_s1_i1s"},
+ },
+ },
+
+ { "_qqsmlt",
+ {4},
+ { {"0"_b, "ccmp_32_condcmp_imm"},
+ },
+ },
+
+ { "_qqtpln",
+ {17},
+ { {"0"_b, "ld1_asisdlsop_bx1_r1b"},
+ {"1"_b, "ld1_asisdlsop_b1_i1b"},
+ },
+ },
+
+ { "_qqyryl",
+ {30, 23, 22, 13, 4},
+ { {"00x0x"_b, "ld1w_z_p_bz_s_x32_unscaled"},
+ {"00x1x"_b, "ldff1w_z_p_bz_s_x32_unscaled"},
+ {"0100x"_b, "ldr_z_bi"},
+ {"01100"_b, "prfw_i_p_bi_s"},
+ {"01110"_b, "prfd_i_p_bi_s"},
+ {"10x0x"_b, "ld1w_z_p_bz_d_x32_unscaled"},
+ {"10x1x"_b, "ldff1w_z_p_bz_d_x32_unscaled"},
+ {"11x0x"_b, "ld1d_z_p_bz_d_x32_unscaled"},
+ {"11x1x"_b, "ldff1d_z_p_bz_d_x32_unscaled"},
+ },
+ },
+
+ { "_qqzrhz",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtau_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtau_asimdmisc_r"},
+ {"0x10000"_b, "fmaxnmv_asimdall_only_sd"},
+ {"1111000"_b, "fcmge_asimdmiscfp16_fz"},
+ {"1x00000"_b, "fcmge_asimdmisc_fz"},
+ {"1x00001"_b, "ursqrte_asimdmisc_r"},
+ {"1x10000"_b, "fminnmv_asimdall_only_sd"},
+ },
+ },
+
+ { "_qrygny",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1b_z_p_bi_u8"},
+ {"00011"_b, "ldnf1b_z_p_bi_u8"},
+ {"00101"_b, "ld1b_z_p_bi_u32"},
+ {"00111"_b, "ldnf1b_z_p_bi_u32"},
+ {"01001"_b, "ld1sw_z_p_bi_s64"},
+ {"01011"_b, "ldnf1sw_z_p_bi_s64"},
+ {"01101"_b, "ld1h_z_p_bi_u32"},
+ {"01111"_b, "ldnf1h_z_p_bi_u32"},
+ {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"},
+ {"100x1"_b, "st1b_z_p_bz_d_64_unscaled"},
+ {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"},
+ {"101x1"_b, "st1b_z_p_ai_d"},
+ {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"},
+ {"110x1"_b, "st1h_z_p_bz_d_64_unscaled"},
+ {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"},
+ {"111x1"_b, "st1h_z_p_ai_d"},
+ },
+ },
+
+ { "_qrykhm",
+ {12},
+ { {"0"_b, "st4_asisdlsop_dx4_r4d"},
+ },
+ },
+
+ { "_qsnqpz",
+ {18, 17},
+ { {"0x"_b, "ld4_asisdlsop_sx4_r4s"},
+ {"10"_b, "ld4_asisdlsop_sx4_r4s"},
+ {"11"_b, "ld4_asisdlsop_s4_i4s"},
+ },
+ },
+
+ { "_qsqqxg",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1010000"_b, "sha512h_qqv_cryptosha512_3"},
+ {"1010001"_b, "sha512h2_qqv_cryptosha512_3"},
+ {"1010010"_b, "sha512su1_vvv2_cryptosha512_3"},
+ {"1010011"_b, "rax1_vvv2_cryptosha512_3"},
+ },
+ },
+
+ { "_qsrlql",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"010xx00"_b, "csel_32_condsel"},
+ {"010xx01"_b, "csinc_32_condsel"},
+ {"0110000"_b, "crc32b_32c_dp_2src"},
+ {"0110001"_b, "crc32h_32c_dp_2src"},
+ {"0110010"_b, "crc32w_32c_dp_2src"},
+ {"0110100"_b, "crc32cb_32c_dp_2src"},
+ {"0110101"_b, "crc32ch_32c_dp_2src"},
+ {"0110110"_b, "crc32cw_32c_dp_2src"},
+ {"110xx00"_b, "csinv_32_condsel"},
+ {"110xx01"_b, "csneg_32_condsel"},
+ },
+ },
+
+ { "_qsrtzz",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_lvshqt"},
+ },
+ },
+
+ { "_qssyls",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "stzgm_64bulk_ldsttags"},
+ },
+ },
+
+ { "_qsxpyq",
+ {20, 19, 18, 17, 16, 13, 12, 4, 3, 2, 1, 0},
+ { {"000000001101"_b, "setf8_only_setf"},
+ },
+ },
+
+ { "_qsygjs",
+ {30, 23, 22, 12, 11, 10},
+ { {"0000xx"_b, "add_32_addsub_ext"},
+ {"000100"_b, "add_32_addsub_ext"},
+ {"1000xx"_b, "sub_32_addsub_ext"},
+ {"100100"_b, "sub_32_addsub_ext"},
+ },
+ },
+
+ { "_qtgvhn",
+ {17},
+ { {"0"_b, "ld4_asisdlsop_bx4_r4b"},
+ {"1"_b, "ld4_asisdlsop_b4_i4b"},
+ },
+ },
+
+ { "_qtjzhs",
+ {17},
+ { {"0"_b, "ld1_asisdlse_r4_4v"},
+ },
+ },
+
+ { "_qtknlp",
+ {30, 11, 10},
+ { {"000"_b, "_skpjrp"},
+ {"001"_b, "_sjnqvx"},
+ {"011"_b, "_rgnxpp"},
+ {"100"_b, "_rtlzxv"},
+ {"101"_b, "_zvlxrl"},
+ {"110"_b, "_ynnrny"},
+ {"111"_b, "_nlkkyx"},
+ },
+ },
+
+ { "_qtkpxg",
{20},
- { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
- {"1", "VisitSVEContiguousNonFaultLoad_ScalarPlusImm"},
+ { {"0"_b, "_srggzy"},
+ {"1"_b, "mrs_rs_systemmove"},
},
},
- { "DecodeSVE101x0110",
- {22},
- { {"0", "VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar"},
- {"1", "VisitSVELoadMultipleStructures_ScalarPlusScalar"},
+ { "_qtmjkr",
+ {23},
+ { {"0"_b, "fdiv_asimdsame_only"},
+ },
+ },
+
+ { "_qtxpky",
+ {4},
+ { {"0"_b, "cmphs_p_p_zi"},
+ {"1"_b, "cmphi_p_p_zi"},
+ },
+ },
+
+ { "_qtxypt",
+ {9, 8, 7, 6, 5, 1, 0},
+ { {"1111111"_b, "retab_64e_branch_reg"},
+ },
+ },
+
+ { "_qtystr",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "scvtf_asimdmiscfp16_r"},
+ {"0x00001"_b, "scvtf_asimdmisc_r"},
+ {"1111000"_b, "fcmeq_asimdmiscfp16_fz"},
+ {"1111001"_b, "frecpe_asimdmiscfp16_r"},
+ {"1x00000"_b, "fcmeq_asimdmisc_fz"},
+ {"1x00001"_b, "frecpe_asimdmisc_r"},
+ },
+ },
+
+ { "_qvlnll",
+ {22, 20, 11},
+ { {"010"_b, "decw_r_rs"},
+ {"110"_b, "decd_r_rs"},
+ },
+ },
+
+ { "_qvlytr",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0x00001"_b, "frint64x_asimdmisc_r"},
+ {"0x10000"_b, "fmaxv_asimdall_only_sd"},
+ {"1111000"_b, "fneg_asimdmiscfp16_r"},
+ {"1111001"_b, "fsqrt_asimdmiscfp16_r"},
+ {"1x00000"_b, "fneg_asimdmisc_r"},
+ {"1x00001"_b, "fsqrt_asimdmisc_r"},
+ {"1x10000"_b, "fminv_asimdall_only_sd"},
+ },
+ },
+
+ { "_qvsypn",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ldnt1w_z_p_bi_contiguous"},
+ {"000x0"_b, "ldnt1w_z_p_br_contiguous"},
+ {"00101"_b, "ld3w_z_p_bi_contiguous"},
+ {"001x0"_b, "ld3w_z_p_br_contiguous"},
+ {"01001"_b, "ldnt1d_z_p_bi_contiguous"},
+ {"010x0"_b, "ldnt1d_z_p_br_contiguous"},
+ {"01101"_b, "ld3d_z_p_bi_contiguous"},
+ {"011x0"_b, "ld3d_z_p_br_contiguous"},
+ {"10011"_b, "stnt1w_z_p_bi_contiguous"},
+ {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"},
+ {"10111"_b, "st3w_z_p_bi_contiguous"},
+ {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"},
+ {"10x01"_b, "st1w_z_p_bi"},
+ {"11011"_b, "stnt1d_z_p_bi_contiguous"},
+ {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"},
+ {"11111"_b, "st3d_z_p_bi_contiguous"},
+ {"11x01"_b, "st1d_z_p_bi"},
+ },
+ },
+
+ { "_qvtxpr",
+ {20, 9, 4},
+ { {"000"_b, "uzp1_p_pp"},
+ },
+ },
+
+ { "_qxrzgv",
+ {17},
+ { {"0"_b, "ld1_asisdlsep_r2_r2"},
+ {"1"_b, "ld1_asisdlsep_i2_i2"},
+ },
+ },
+
+ { "_qxtvzy",
+ {13, 12, 11, 10},
+ { {"0000"_b, "umlal_asimddiff_l"},
+ {"0001"_b, "sub_asimdsame_only"},
+ {"0010"_b, "_gznnvh"},
+ {"0011"_b, "cmeq_asimdsame_only"},
+ {"0101"_b, "mls_asimdsame_only"},
+ {"0110"_b, "_vsqlkr"},
+ {"0111"_b, "pmul_asimdsame_only"},
+ {"1000"_b, "umlsl_asimddiff_l"},
+ {"1001"_b, "umaxp_asimdsame_only"},
+ {"1010"_b, "_gggyqx"},
+ {"1011"_b, "uminp_asimdsame_only"},
+ {"1101"_b, "sqrdmulh_asimdsame_only"},
+ {"1110"_b, "_slnkst"},
+ },
+ },
+
+ { "_qyjvqr",
+ {23, 18, 17, 16},
+ { {"0000"_b, "sqxtnt_z_zz"},
+ },
+ },
+
+ { "_qytrjj",
+ {30, 23, 22},
+ { {"100"_b, "bcax_vvv16_crypto4"},
+ },
+ },
+
+ { "_qzjnpr",
+ {30, 23, 22, 20, 19, 18, 17, 16},
+ { {"00000000"_b, "udf_only_perm_undef"},
+ },
+ },
+
+ { "_qzrjss",
+ {18, 17, 12},
+ { {"0x0"_b, "st3_asisdlsop_dx3_r3d"},
+ {"100"_b, "st3_asisdlsop_dx3_r3d"},
+ {"110"_b, "st3_asisdlsop_d3_i3d"},
+ },
+ },
+
+ { "_qzsthq",
+ {30, 23, 22},
+ { {"000"_b, "strb_32_ldst_pos"},
+ {"001"_b, "ldrb_32_ldst_pos"},
+ {"010"_b, "ldrsb_64_ldst_pos"},
+ {"011"_b, "ldrsb_32_ldst_pos"},
+ {"100"_b, "strh_32_ldst_pos"},
+ {"101"_b, "ldrh_32_ldst_pos"},
+ {"110"_b, "ldrsh_64_ldst_pos"},
+ {"111"_b, "ldrsh_32_ldst_pos"},
+ },
+ },
+
+ { "_qzxvsk",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "bic_asimdimm_l_sl"},
+ {"00x100"_b, "usra_asimdshf_r"},
+ {"00x110"_b, "ursra_asimdshf_r"},
+ {"010x00"_b, "usra_asimdshf_r"},
+ {"010x10"_b, "ursra_asimdshf_r"},
+ {"011100"_b, "usra_asimdshf_r"},
+ {"011110"_b, "ursra_asimdshf_r"},
+ {"0x1000"_b, "usra_asimdshf_r"},
+ {"0x1010"_b, "ursra_asimdshf_r"},
+ },
+ },
+
+ { "_qzzlhq",
+ {30, 23, 22},
+ { {"000"_b, "and_32_log_imm"},
+ {"010"_b, "movn_32_movewide"},
+ {"100"_b, "eor_32_log_imm"},
+ {"110"_b, "movz_32_movewide"},
+ },
+ },
+
+ { "_qzzlpv",
+ {13, 12},
+ { {"01"_b, "gmi_64g_dp_2src"},
+ {"10"_b, "lsrv_64_dp_2src"},
+ },
+ },
+
+ { "_rgjqzs",
+ {30, 23, 22},
+ { {"001"_b, "sbfm_64m_bitfield"},
+ {"101"_b, "ubfm_64m_bitfield"},
+ },
+ },
+
+ { "_rgnxpp",
+ {23, 22},
+ { {"00"_b, "fcsel_s_floatsel"},
+ {"01"_b, "fcsel_d_floatsel"},
+ {"11"_b, "fcsel_h_floatsel"},
+ },
+ },
+
+ { "_rgztzl",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "saddlp_asimdmisc_p"},
+ {"00001"_b, "xtn_asimdmisc_n"},
+ },
+ },
+
+ { "_rhhrhg",
+ {30, 13, 4},
+ { {"000"_b, "cmphs_p_p_zw"},
+ {"001"_b, "cmphi_p_p_zw"},
+ {"010"_b, "cmplo_p_p_zw"},
+ {"011"_b, "cmpls_p_p_zw"},
+ },
+ },
+
+ { "_rhmxyp",
+ {20, 9, 4},
+ { {"000"_b, "trn1_p_pp"},
+ },
+ },
+
+ { "_rhpmjz",
+ {12, 11},
+ { {"00"_b, "incp_z_p_z"},
+ {"01"_b, "incp_r_p_r"},
+ {"10"_b, "_mpstrr"},
+ },
+ },
+
+ { "_rhttgj",
+ {12, 10},
+ { {"00"_b, "_xxpzrl"},
+ {"01"_b, "_vlzrlm"},
+ {"10"_b, "_vxylhh"},
+ {"11"_b, "_pxgztg"},
+ },
+ },
+
+ { "_rhvksm",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtnu_asisdmiscfp16_r"},
+ {"0x00001"_b, "fcvtnu_asisdmisc_r"},
+ {"1111001"_b, "fcvtpu_asisdmiscfp16_r"},
+ {"1x00001"_b, "fcvtpu_asisdmisc_r"},
+ },
+ },
+
+ { "_rhzhyz",
+ {13, 12, 4},
+ { {"000"_b, "rmif_only_rmif"},
+ },
+ },
+
+ { "_rjmyyl",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fmov_s_floatdp1"},
+ {"000010"_b, "fneg_s_floatdp1"},
+ {"001000"_b, "frintn_s_floatdp1"},
+ {"001010"_b, "frintm_s_floatdp1"},
+ {"001100"_b, "frinta_s_floatdp1"},
+ {"001110"_b, "frintx_s_floatdp1"},
+ {"010000"_b, "frint32z_s_floatdp1"},
+ {"010010"_b, "frint64z_s_floatdp1"},
+ },
+ },
+
+ { "_rjyrnt",
+ {4},
+ { {"0"_b, "cmpge_p_p_zi"},
+ {"1"_b, "cmpgt_p_p_zi"},
+ },
+ },
+
+ { "_rjysnh",
+ {18, 17, 16, 9, 8, 7, 6},
+ { {"0000000"_b, "fadd_z_p_zs"},
+ {"0010000"_b, "fsub_z_p_zs"},
+ {"0100000"_b, "fmul_z_p_zs"},
+ {"0110000"_b, "fsubr_z_p_zs"},
+ {"1000000"_b, "fmaxnm_z_p_zs"},
+ {"1010000"_b, "fminnm_z_p_zs"},
+ {"1100000"_b, "fmax_z_p_zs"},
+ {"1110000"_b, "fmin_z_p_zs"},
+ },
+ },
+
+ { "_rkqtvs",
+ {23, 22, 13},
+ { {"100"_b, "fmlal_asimdelem_lh"},
+ {"xx1"_b, "smlal_asimdelem_l"},
+ },
+ },
+
+ { "_rkrltp",
+ {17},
+ { {"0"_b, "st3_asisdlso_b3_3b"},
+ },
+ },
+
+ { "_rksxpn",
+ {30, 23, 22, 11, 10},
+ { {"00010"_b, "str_b_ldst_regoff"},
+ {"00110"_b, "ldr_b_ldst_regoff"},
+ {"01010"_b, "str_q_ldst_regoff"},
+ {"01110"_b, "ldr_q_ldst_regoff"},
+ {"10010"_b, "str_h_ldst_regoff"},
+ {"10110"_b, "ldr_h_ldst_regoff"},
+ },
+ },
+
+ { "_rkvyqk",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "movi_asimdimm_l_hl"},
+ {"00x100"_b, "shrn_asimdshf_n"},
+ {"00x101"_b, "rshrn_asimdshf_n"},
+ {"00x110"_b, "sshll_asimdshf_l"},
+ {"010x00"_b, "shrn_asimdshf_n"},
+ {"010x01"_b, "rshrn_asimdshf_n"},
+ {"010x10"_b, "sshll_asimdshf_l"},
+ {"011100"_b, "shrn_asimdshf_n"},
+ {"011101"_b, "rshrn_asimdshf_n"},
+ {"011110"_b, "sshll_asimdshf_l"},
+ {"0x1000"_b, "shrn_asimdshf_n"},
+ {"0x1001"_b, "rshrn_asimdshf_n"},
+ {"0x1010"_b, "sshll_asimdshf_l"},
+ },
+ },
+
+ { "_rlrjxp",
+ {13, 4},
+ { {"00"_b, "fcmge_p_p_zz"},
+ {"01"_b, "fcmgt_p_p_zz"},
+ {"10"_b, "fcmeq_p_p_zz"},
+ {"11"_b, "fcmne_p_p_zz"},
+ },
+ },
+
+ { "_rlyvpn",
+ {23, 12, 11, 10},
+ { {"0000"_b, "sqshrunb_z_zi"},
+ {"0001"_b, "sqshrunt_z_zi"},
+ {"0010"_b, "sqrshrunb_z_zi"},
+ {"0011"_b, "sqrshrunt_z_zi"},
+ {"0100"_b, "shrnb_z_zi"},
+ {"0101"_b, "shrnt_z_zi"},
+ {"0110"_b, "rshrnb_z_zi"},
+ {"0111"_b, "rshrnt_z_zi"},
+ },
+ },
+
+ { "_rmltms",
+ {9, 8, 7, 6, 5, 1, 0},
+ { {"1111100"_b, "eret_64e_branch_reg"},
+ },
+ },
+
+ { "_rmmmjj",
+ {30, 23, 22},
+ { {"000"_b, "smaddl_64wa_dp_3src"},
+ {"010"_b, "umaddl_64wa_dp_3src"},
+ },
+ },
+
+ { "_rmxjsn",
+ {30},
+ { {"0"_b, "orr_64_log_shift"},
+ {"1"_b, "ands_64_log_shift"},
+ },
+ },
+
+ { "_rnktts",
+ {23, 22},
+ { {"00"_b, "and_asimdsame_only"},
+ {"01"_b, "bic_asimdsame_only"},
+ {"10"_b, "orr_asimdsame_only"},
+ {"11"_b, "orn_asimdsame_only"},
+ },
+ },
+
+ { "_rnqtmt",
+ {30},
+ { {"0"_b, "_zyjjgs"},
+ {"1"_b, "_lrntmz"},
+ },
+ },
+
+ { "_rnrzsj",
+ {20, 18, 17},
+ { {"000"_b, "_lgglzy"},
+ },
+ },
+
+ { "_rnypvh",
+ {17},
+ { {"0"_b, "st1_asisdlsop_bx1_r1b"},
+ {"1"_b, "st1_asisdlsop_b1_i1b"},
+ },
+ },
+
+ { "_rpmrkq",
+ {23},
+ { {"0"_b, "fcmeq_asimdsame_only"},
+ },
+ },
+
+ { "_rpqgjl",
+ {18, 17, 16, 13, 12, 7, 4, 3, 2, 1, 0},
+ { {"00000011111"_b, "_kpxtsp"},
+ },
+ },
+
+ { "_rpzykx",
+ {11},
+ { {"0"_b, "_svvyrz"},
},
},
- { "DecodeSVE101x0111",
- {22, 20},
- { {"00", "VisitSVEContiguousNonTemporalLoad_ScalarPlusImm"},
- {"10", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+ { "_rqhryp",
+ {12, 10},
+ { {"00"_b, "_kjpxvh"},
+ {"01"_b, "_mxvjxx"},
+ {"10"_b, "sm4ekey_z_zz"},
+ {"11"_b, "rax1_z_zz"},
},
},
- { "DecodeSVE101x1111",
- {22, 20},
- { {"x0", "VisitSVELoadMultipleStructures_ScalarPlusImm"},
+ { "_rshyht",
+ {13},
+ { {"0"_b, "facge_p_p_zz"},
+ {"1"_b, "facgt_p_p_zz"},
},
},
- { "DecodeSVE110x0111",
- {22, 4},
- { {"00", "VisitSVE64BitGatherPrefetch_VectorPlusImm"},
- {"1x", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ { "_rsqmgk",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0000000"_b, "movprfx_z_z"},
},
},
- { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ { "_rsyhtj",
+ {13, 12, 11, 10},
+ { {"0001"_b, "ushl_asisdsame_only"},
+ {"0010"_b, "_gxnlxg"},
+ {"0011"_b, "uqshl_asisdsame_only"},
+ {"0101"_b, "urshl_asisdsame_only"},
+ {"0111"_b, "uqrshl_asisdsame_only"},
+ {"1010"_b, "_msnsjp"},
+ {"1110"_b, "_llnzlv"},
+ },
+ },
+
+ { "_rsyjqj",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0010000"_b, "fmaxv_asimdall_only_h"},
+ {"0x00001"_b, "frint64z_asimdmisc_r"},
+ {"1010000"_b, "fminv_asimdall_only_h"},
+ {"1111000"_b, "fabs_asimdmiscfp16_r"},
+ {"1x00000"_b, "fabs_asimdmisc_r"},
+ },
+ },
+
+ { "_rsyzrs",
{22},
- { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ { {"0"_b, "str_64_ldst_regoff"},
+ {"1"_b, "ldr_64_ldst_regoff"},
},
},
- { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
+ { "_rtgkkg",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1101001"_b, "smmla_asimdsame2_g"},
+ {"1101011"_b, "usmmla_asimdsame2_g"},
+ {"x100111"_b, "usdot_asimdsame2_d"},
+ {"xxx0101"_b, "sdot_asimdsame2_d"},
+ },
+ },
+
+ { "_rtlzxv",
+ {13, 12},
+ { {"01"_b, "sqdmull_asisddiff_only"},
+ },
+ },
+
+ { "_rtpztp",
{22},
- { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ { {"0"_b, "umullb_z_zzi_s"},
+ {"1"_b, "umullb_z_zzi_d"},
},
},
- { "DecodeSVE110010xx",
- {23, 4},
- { {"00", "VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets"},
- {"1x", "VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets"},
+ { "_rtrlts",
+ {23, 22, 12, 11, 10},
+ { {"01000"_b, "bfdot_z_zzz"},
+ {"10000"_b, "fmlalb_z_zzz"},
+ {"10001"_b, "fmlalt_z_zzz"},
+ {"11000"_b, "bfmlalb_z_zzz"},
+ {"11001"_b, "bfmlalt_z_zzz"},
},
},
- { "DecodeSVE110011xx",
+ { "_rvjzgt",
{23, 22, 4},
- { {"x0x", "VisitSVE64BitGatherLoad_VectorPlusImm"},
- {"010", "VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets"},
- {"11x", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+ { {"000"_b, "fccmp_s_floatccmp"},
+ {"001"_b, "fccmpe_s_floatccmp"},
+ {"010"_b, "fccmp_d_floatccmp"},
+ {"011"_b, "fccmpe_d_floatccmp"},
+ {"110"_b, "fccmp_h_floatccmp"},
+ {"111"_b, "fccmpe_h_floatccmp"},
},
},
- { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
- {22},
- { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ { "_rvzhhx",
+ {18, 17, 12},
+ { {"000"_b, "st3_asisdlso_d3_3d"},
},
},
- { "UnallocSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets",
- {22},
- { {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets"},
+ { "_rxjrmn",
+ {22, 13, 12},
+ { {"000"_b, "swpa_32_memop"},
+ {"100"_b, "swpal_32_memop"},
},
},
- { "DecodeSVE110111xx",
- {22},
- { {"0", "VisitSVE64BitGatherLoad_VectorPlusImm"},
- {"1", "VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets"},
+ { "_rxpspy",
+ {30, 23, 22, 12, 11, 10},
+ { {"0000xx"_b, "adds_32s_addsub_ext"},
+ {"000100"_b, "adds_32s_addsub_ext"},
+ {"1000xx"_b, "subs_32s_addsub_ext"},
+ {"100100"_b, "subs_32s_addsub_ext"},
},
},
- { "DecodeSVE111x0011",
- {22},
- { {"0", "VisitSVEContiguousNonTemporalStore_ScalarPlusScalar"},
- {"1", "VisitSVEStoreMultipleStructures_ScalarPlusScalar"},
+ { "_ryglvl",
+ {4},
+ { {"0"_b, "ccmp_32_condcmp_reg"},
},
},
- { "DecodeSVE111x01x0",
- {22},
- { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets"},
- {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets"},
+ { "_rykykh",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "rev64_asimdmisc_r"},
},
},
- { "DecodeSVE111x0101",
+ { "_rzkmny",
+ {30},
+ { {"0"_b, "and_64_log_shift"},
+ {"1"_b, "eor_64_log_shift"},
+ },
+ },
+
+ { "_rznrqt",
{22},
- { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets"},
- {"1", "VisitSVE64BitScatterStore_VectorPlusImm"},
+ { {"0"_b, "umullt_z_zzi_s"},
+ {"1"_b, "umullt_z_zzi_d"},
},
},
- { "DecodeSVE111x0111",
- {22, 20},
- { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
- {"01", "VisitSVEContiguousNonTemporalStore_ScalarPlusImm"},
- {"11", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+ { "_rzqzlq",
+ {23, 22, 20, 19, 16, 13, 12},
+ { {"0111110"_b, "fcvtns_asisdmiscfp16_r"},
+ {"0111111"_b, "fcvtms_asisdmiscfp16_r"},
+ {"0x00110"_b, "fcvtns_asisdmisc_r"},
+ {"0x00111"_b, "fcvtms_asisdmisc_r"},
+ {"1111110"_b, "fcvtps_asisdmiscfp16_r"},
+ {"1111111"_b, "fcvtzs_asisdmiscfp16_r"},
+ {"1x00110"_b, "fcvtps_asisdmisc_r"},
+ {"1x00111"_b, "fcvtzs_asisdmisc_r"},
+ {"xx00000"_b, "cmgt_asisdmisc_z"},
+ {"xx00001"_b, "cmeq_asisdmisc_z"},
+ {"xx00010"_b, "cmlt_asisdmisc_z"},
+ {"xx00011"_b, "abs_asisdmisc_r"},
+ {"xx10111"_b, "addp_asisdpair_only"},
},
},
- { "DecodeSVE111x11x0",
- {22},
- { {"0", "VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets"},
- {"1", "VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets"},
+ { "_rztvnl",
+ {20, 19, 18, 17, 16},
+ { {"0000x"_b, "fcadd_z_p_zz"},
+ {"10000"_b, "faddp_z_p_zz"},
+ {"10100"_b, "fmaxnmp_z_p_zz"},
+ {"10101"_b, "fminnmp_z_p_zz"},
+ {"10110"_b, "fmaxp_z_p_zz"},
+ {"10111"_b, "fminp_z_p_zz"},
},
},
- { "DecodeSVE111x1101",
- {22},
- { {"0", "VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets"},
- {"1", "VisitSVE32BitScatterStore_VectorPlusImm"},
+ { "_rzzxsn",
+ {30, 13},
+ { {"00"_b, "_nvyxmh"},
+ {"01"_b, "_hykhmt"},
+ {"10"_b, "_yszjsm"},
+ {"11"_b, "_jrnxzh"},
+ },
+ },
+
+ { "_sghgtk",
+ {4},
+ { {"0"_b, "cmplo_p_p_zi"},
+ {"1"_b, "cmpls_p_p_zi"},
+ },
+ },
+
+ { "_sgnknz",
+ {23, 22, 20, 19, 11},
+ { {"00011"_b, "fcvtzs_asisdshf_c"},
+ {"001x1"_b, "fcvtzs_asisdshf_c"},
+ {"01xx1"_b, "fcvtzs_asisdshf_c"},
+ },
+ },
+
+ { "_sgztlj",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0010000"_b, "fmaxnmv_asimdall_only_h"},
+ {"0111001"_b, "fcvtas_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtas_asimdmisc_r"},
+ {"1010000"_b, "fminnmv_asimdall_only_h"},
+ {"1111000"_b, "fcmgt_asimdmiscfp16_fz"},
+ {"1x00000"_b, "fcmgt_asimdmisc_fz"},
+ {"1x00001"_b, "urecpe_asimdmisc_r"},
+ },
+ },
+
+ { "_shgkvq",
+ {18, 17},
+ { {"00"_b, "st2_asisdlso_s2_2s"},
+ },
+ },
+
+ { "_shqygv",
+ {30, 4},
+ { {"00"_b, "_thvxym"},
+ {"01"_b, "_mrhtxt"},
+ {"10"_b, "_ptjyqx"},
+ {"11"_b, "_rshyht"},
},
},
- { "DecodeSVE111x1111",
- {22, 20},
- { {"x0", "VisitSVEContiguousStore_ScalarPlusImm"},
- {"x1", "VisitSVEStoreMultipleStructures_ScalarPlusImm"},
+ { "_shrsxr",
+ {30, 23, 22},
+ { {"000"_b, "stnp_64_ldstnapair_offs"},
+ {"001"_b, "ldnp_64_ldstnapair_offs"},
+ {"010"_b, "stp_64_ldstpair_post"},
+ {"011"_b, "ldp_64_ldstpair_post"},
},
},
- { "UnallocSVEStorePredicateRegister",
+ { "_shzysp",
+ {30, 23, 22, 19, 18, 17, 16},
+ { {"1001000"_b, "ins_asimdins_ir_r"},
+ {"100x100"_b, "ins_asimdins_ir_r"},
+ {"100xx10"_b, "ins_asimdins_ir_r"},
+ {"100xxx1"_b, "ins_asimdins_ir_r"},
+ {"x01xxxx"_b, "fmulx_asimdsamefp16_only"},
+ },
+ },
+
+ { "_sjlpxn",
+ {23, 22},
+ { {"01"_b, "fcmla_asimdelem_c_h"},
+ {"10"_b, "fcmla_asimdelem_c_s"},
+ },
+ },
+
+ { "_sjlrxn",
+ {10},
+ { {"0"_b, "_mpzqxm"},
+ },
+ },
+
+ { "_sjnqvx",
{23, 22, 4},
- { {"100", "VisitSVEStorePredicateRegister"},
+ { {"000"_b, "fccmp_s_floatccmp"},
+ {"001"_b, "fccmpe_s_floatccmp"},
+ {"010"_b, "fccmp_d_floatccmp"},
+ {"011"_b, "fccmpe_d_floatccmp"},
+ {"110"_b, "fccmp_h_floatccmp"},
+ {"111"_b, "fccmpe_h_floatccmp"},
+ },
+ },
+
+ { "_sjnspg",
+ {4},
+ { {"0"_b, "nors_p_p_pp_z"},
+ {"1"_b, "nands_p_p_pp_z"},
},
},
- { "DecodeSVE1111x010",
+ { "_sjnxky",
+ {30},
+ { {"1"_b, "_ylyskq"},
+ },
+ },
+
+ { "_sjrqth",
{23, 22},
- { {"0x", "VisitSVEContiguousStore_ScalarPlusScalar"},
- {"10", "VisitSVEStoreVectorRegister"},
- {"11", "VisitSVEContiguousStore_ScalarPlusScalar"},
+ { {"00"_b, "fmov_s_floatimm"},
+ {"01"_b, "fmov_d_floatimm"},
+ {"11"_b, "fmov_h_floatimm"},
},
},
- { "DecodeNEONScalarAnd3SHA",
- {29, 23, 22, 15, 14, 11, 10},
- { {"0xx0x00", "VisitCrypto3RegSHA"},
- {"x000xx1", "UnallocNEONScalarCopy"},
- {"xxx1xx1", "UnallocNEONScalar3SameExtra"},
- {"xx100x1", "UnallocNEONScalar3SameFP16"},
+ { "_sjsltg",
+ {17},
+ { {"0"_b, "st2_asisdlsop_hx2_r2h"},
+ {"1"_b, "st2_asisdlsop_h2_i2h"},
},
},
- { "DecodeNEONScalarAnd2SHA",
- {29, 20, 19, 18, 17, 11, 10},
- { {"0010010", "VisitCrypto2RegSHA"},
- {"x000010", "UnallocNEONScalar2RegMisc"},
- {"x100010", "UnallocNEONScalarPairwise"},
- {"x110010", "UnallocNEONScalar2RegMiscFP16"},
- {"xxxxxx1", "UnallocNEONScalar3Same"},
- {"xxxxx00", "UnallocNEONScalar3Diff"},
+ { "_sjtrhm",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1rqb_z_p_bi_u8"},
+ {"000x0"_b, "ld1rqb_z_p_br_contiguous"},
+ {"01001"_b, "ld1rqh_z_p_bi_u16"},
+ {"010x0"_b, "ld1rqh_z_p_br_contiguous"},
+ {"100x1"_b, "stnt1b_z_p_ar_d_64_unscaled"},
+ {"101x1"_b, "stnt1b_z_p_ar_s_x32_unscaled"},
+ {"110x1"_b, "stnt1h_z_p_ar_d_64_unscaled"},
+ {"111x1"_b, "stnt1h_z_p_ar_s_x32_unscaled"},
},
},
- { "DecodeNEONScalar",
- {28, 23, 10},
- { {"101", "UnallocNEONScalarShiftImmediate"},
- {"1x0", "UnallocNEONScalarByIndexedElement"},
+ { "_sjvhlq",
+ {22},
+ { {"0"_b, "smullb_z_zzi_s"},
+ {"1"_b, "smullb_z_zzi_d"},
},
},
- { "DecodeNEONLoadStoreMulti",
- {20, 19, 18, 17, 16},
- { {"00000", "UnallocNEONLoadStoreMultiStruct"},
+ { "_sjzsvv",
+ {30, 23, 13, 12, 11, 10},
+ { {"101001"_b, "ucvtf_asisdshf_c"},
+ {"101111"_b, "fcvtzu_asisdshf_c"},
+ {"1x01x0"_b, "sqrdmlah_asisdelem_r"},
+ {"1x11x0"_b, "sqrdmlsh_asisdelem_r"},
+ },
+ },
+
+ { "_skglrt",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "mvni_asimdimm_l_sl"},
+ {"00x100"_b, "ushr_asimdshf_r"},
+ {"00x110"_b, "urshr_asimdshf_r"},
+ {"010x00"_b, "ushr_asimdshf_r"},
+ {"010x10"_b, "urshr_asimdshf_r"},
+ {"011100"_b, "ushr_asimdshf_r"},
+ {"011110"_b, "urshr_asimdshf_r"},
+ {"0x1000"_b, "ushr_asimdshf_r"},
+ {"0x1010"_b, "urshr_asimdshf_r"},
+ },
+ },
+
+ { "_skpjrp",
+ {23, 22, 12},
+ { {"000"_b, "_xzyylk"},
+ {"001"_b, "_hpgqlp"},
+ {"010"_b, "_qnsxkj"},
+ {"011"_b, "_nnlvqz"},
+ {"110"_b, "_vylhvl"},
+ {"111"_b, "_stgkpy"},
+ },
+ },
+
+ { "_slhpgp",
+ {23},
+ { {"0"_b, "facge_asimdsame_only"},
+ {"1"_b, "facgt_asimdsame_only"},
+ },
+ },
+
+ { "_sllkpt",
+ {13, 12},
+ { {"10"_b, "lsrv_32_dp_2src"},
+ },
+ },
+
+ { "_slnkst",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "fcvtmu_asimdmiscfp16_r"},
+ {"0x00001"_b, "fcvtmu_asimdmisc_r"},
+ {"1111001"_b, "fcvtzu_asimdmiscfp16_r"},
+ {"1x00001"_b, "fcvtzu_asimdmisc_r"},
+ {"xx00000"_b, "neg_asimdmisc_r"},
+ },
+ },
+
+ { "_sltqpy",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xx10"_b, "strb_32b_ldst_regoff"},
+ {"001xx10"_b, "ldrb_32b_ldst_regoff"},
+ {"0100000"_b, "ldaprb_32l_memop"},
+ {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
+ {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
+ {"100xx10"_b, "strh_32_ldst_regoff"},
+ {"101xx10"_b, "ldrh_32_ldst_regoff"},
+ {"1100000"_b, "ldaprh_32l_memop"},
+ {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+ {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+ },
+ },
+
+ { "_smplhv",
+ {10},
+ { {"0"_b, "braa_64p_branch_reg"},
+ {"1"_b, "brab_64p_branch_reg"},
+ },
+ },
+
+ { "_smqvrs",
+ {18, 17},
+ { {"00"_b, "st1_asisdlse_r1_1v"},
+ },
+ },
+
+ { "_smrtxq",
+ {13, 12},
+ { {"00"_b, "sbcs_32_addsub_carry"},
+ },
+ },
+
+ { "_snjpvy",
+ {23, 22, 13, 12, 11, 10},
+ { {"0001x0"_b, "fmulx_asimdelem_rh_h"},
+ {"0x0001"_b, "sqshrun_asimdshf_n"},
+ {"0x0011"_b, "sqrshrun_asimdshf_n"},
+ {"0x0101"_b, "uqshrn_asimdshf_n"},
+ {"0x0111"_b, "uqrshrn_asimdshf_n"},
+ {"0x1001"_b, "ushll_asimdshf_l"},
+ {"1000x0"_b, "fmlal2_asimdelem_lh"},
+ {"1x01x0"_b, "fmulx_asimdelem_r_sd"},
+ {"xx10x0"_b, "umull_asimdelem_l"},
+ },
+ },
+
+ { "_snkqvp",
+ {23, 22, 20, 19, 18, 17, 16, 13, 12, 11},
+ { {"0011111001"_b, "_gkpvxz"},
+ },
+ },
+
+ { "_sntyqy",
+ {4},
+ { {"0"_b, "cmphs_p_p_zi"},
+ {"1"_b, "cmphi_p_p_zi"},
+ },
+ },
+
+ { "_sntzjg",
+ {23, 22, 11, 10},
+ { {"0000"_b, "_qssyls"},
+ {"0001"_b, "stg_64spost_ldsttags"},
+ {"0010"_b, "stg_64soffset_ldsttags"},
+ {"0011"_b, "stg_64spre_ldsttags"},
+ {"0100"_b, "ldg_64loffset_ldsttags"},
+ {"0101"_b, "stzg_64spost_ldsttags"},
+ {"0110"_b, "stzg_64soffset_ldsttags"},
+ {"0111"_b, "stzg_64spre_ldsttags"},
+ {"1000"_b, "_kyxqgg"},
+ {"1001"_b, "st2g_64spost_ldsttags"},
+ {"1010"_b, "st2g_64soffset_ldsttags"},
+ {"1011"_b, "st2g_64spre_ldsttags"},
+ {"1100"_b, "_stjrgx"},
+ {"1101"_b, "stz2g_64spost_ldsttags"},
+ {"1110"_b, "stz2g_64soffset_ldsttags"},
+ {"1111"_b, "stz2g_64spre_ldsttags"},
+ },
+ },
+
+ { "_spglxn",
+ {4, 3, 2, 1, 0},
+ { {"11111"_b, "_yqmvxk"},
+ },
+ },
+
+ { "_sphpkr",
+ {4, 3, 2, 1, 0},
+ { {"11111"_b, "_thsxvg"},
+ },
+ },
+
+ { "_spjjkg",
+ {23, 22, 13, 12, 11, 10},
+ { {"0011x0"_b, "sudot_asimdelem_d"},
+ {"0111x0"_b, "bfdot_asimdelem_e"},
+ {"0x1001"_b, "scvtf_asimdshf_c"},
+ {"0x1111"_b, "fcvtzs_asimdshf_c"},
+ {"1011x0"_b, "usdot_asimdelem_d"},
+ {"1111x0"_b, "bfmlal_asimdelem_f"},
+ {"xx00x0"_b, "sqdmulh_asimdelem_r"},
+ {"xx01x0"_b, "sqrdmulh_asimdelem_r"},
+ {"xx10x0"_b, "sdot_asimdelem_d"},
+ },
+ },
+
+ { "_spmkmm",
+ {30, 19, 18, 17, 16, 10},
+ { {"110001"_b, "ins_asimdins_iv_v"},
+ {"1x1001"_b, "ins_asimdins_iv_v"},
+ {"1xx101"_b, "ins_asimdins_iv_v"},
+ {"1xxx11"_b, "ins_asimdins_iv_v"},
+ {"xxxxx0"_b, "ext_asimdext_only"},
+ },
+ },
+
+ { "_spzgkt",
+ {23, 22, 13, 12, 11, 10},
+ { {"0x1001"_b, "ucvtf_asimdshf_c"},
+ {"0x1111"_b, "fcvtzu_asimdshf_c"},
+ {"1000x0"_b, "fmlsl2_asimdelem_lh"},
+ {"xx01x0"_b, "sqrdmlah_asimdelem_r"},
+ {"xx10x0"_b, "udot_asimdelem_d"},
+ {"xx11x0"_b, "sqrdmlsh_asimdelem_r"},
+ },
+ },
+
+ { "_sqgjmn",
+ {20, 9},
+ { {"00"_b, "_mxgykv"},
+ },
+ },
+
+ { "_sqgxzn",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "paciza_64z_dp_1src"},
+ },
+ },
+
+ { "_sqjpsl",
+ {30, 13, 12, 11, 10},
+ { {"10001"_b, "sqrdmlah_asisdsame2_only"},
+ {"10011"_b, "sqrdmlsh_asisdsame2_only"},
+ },
+ },
+
+ { "_sqpjtr",
+ {20, 18, 17},
+ { {"000"_b, "_nllnsg"},
+ },
+ },
+
+ { "_srggzy",
+ {19},
+ { {"0"_b, "_xqgxjp"},
+ {"1"_b, "sysl_rc_systeminstrs"},
+ },
+ },
+
+ { "_srglgl",
+ {18, 17},
+ { {"0x"_b, "st3_asisdlsop_sx3_r3s"},
+ {"10"_b, "st3_asisdlsop_sx3_r3s"},
+ {"11"_b, "st3_asisdlsop_s3_i3s"},
+ },
+ },
+
+ { "_srmhjk",
+ {30},
+ { {"0"_b, "ldr_s_loadlit"},
+ {"1"_b, "ldr_d_loadlit"},
},
},
- { "DecodeNEONLoadStoreSingle",
+ { "_srmhlk",
{20, 19, 18, 17, 16},
- { {"00000", "UnallocNEONLoadStoreSingleStruct"},
+ { {"00000"_b, "uaddlp_asimdmisc_p"},
+ {"00001"_b, "sqxtun_asimdmisc_n"},
},
},
- { "DecodeNEONOther",
- {29, 23, 22, 15, 14, 11, 10},
- { {"0xx0x00", "UnallocNEONTable"},
- {"0xx0x10", "UnallocNEONPerm"},
- {"1xx0xx0", "UnallocNEONExtract"},
- {"x000xx1", "UnallocNEONCopy"},
- {"xx100x1", "UnallocNEON3SameFP16"},
- {"xxx1xx1", "UnallocNEON3SameExtra"},
+ { "_srvnql",
+ {18, 17, 12},
+ { {"0x0"_b, "ld1_asisdlsop_dx1_r1d"},
+ {"100"_b, "ld1_asisdlsop_dx1_r1d"},
+ {"110"_b, "ld1_asisdlsop_d1_i1d"},
},
},
- { "DecodeNEON2OpAndAcross",
- {30, 29, 20, 19, 18, 17},
- { {"100100", "VisitCryptoAES"},
- {"xx1100", "UnallocNEON2RegMiscFP16"},
- {"xx0000", "UnallocNEON2RegMisc"},
- {"xx1000", "UnallocNEONAcrossLanes"},
+ { "_stgkpy",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_h_floatimm"},
},
},
- { "DecodeNEON3Op",
- {11, 10},
- { {"00", "UnallocNEON3Different"},
- {"10", "DecodeNEON2OpAndAcross"},
- {"x1", "UnallocNEON3Same"},
+ { "_stjrgx",
+ {20, 19, 18, 17, 16, 13, 12},
+ { {"0000000"_b, "ldgm_64bulk_ldsttags"},
},
},
- { "DecodeNEONImmAndIndex",
- {23, 22, 21, 20, 19, 10},
- { {"000001", "UnallocNEONModifiedImmediate"},
- {"0xxx11", "UnallocNEONShiftImmediate"},
- {"0xx1x1", "UnallocNEONShiftImmediate"},
- {"0x1xx1", "UnallocNEONShiftImmediate"},
- {"01xxx1", "UnallocNEONShiftImmediate"},
- {"xxxxx0", "UnallocNEONByIndexedElement"},
+ { "_stqmps",
+ {12},
+ { {"0"_b, "ld3_asisdlsop_dx3_r3d"},
},
},
- { "DecodeFP",
- {15, 14, 13, 12, 11, 10},
- { {"000000", "UnallocFPIntegerConvert"},
- {"x10000", "UnallocFPDataProcessing1Source"},
- {"xx1000", "UnallocFPCompare"},
- {"xxx100", "UnallocFPImmediate"},
- {"xxxx01", "UnallocFPConditionalCompare"},
- {"xxxx10", "UnallocFPDataProcessing2Source"},
- {"xxxx11", "UnallocFPConditionalSelect"},
+ { "_strkph",
+ {23, 22},
+ { {"00"_b, "tbl_asimdtbl_l2_2"},
},
},
- { "DecodeLoadStore",
- {11, 10},
- { {"00", "UnallocLoadStoreUnscaledOffset"},
- {"01", "UnallocLoadStorePostIndex"},
- {"10", "VisitUnimplemented"}, // LoadStoreUnprivileged.
- {"11", "UnallocLoadStorePreIndex"},
+ { "_svnyyx",
+ {13, 12},
+ { {"00"_b, "adcs_32_addsub_carry"},
},
},
- { "DecodeLoadStoreRegister",
- {11, 10},
- { {"00", "UnallocAtomicMemory"},
- {"10", "UnallocLoadStoreRegisterOffset"},
- {"x1", "UnallocLoadStorePAC"},
+ { "_svrnxq",
+ {12},
+ { {"0"_b, "st3_asisdlsop_dx3_r3d"},
},
},
- { "DecodeCondCmp",
- {11},
- { {"0", "UnallocConditionalCompareRegister"},
- {"1", "UnallocConditionalCompareImmediate"},
+ { "_svvyrz",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"00xxxxx"_b, "addvl_r_ri"},
+ {"01xxxxx"_b, "addpl_r_ri"},
+ {"1011111"_b, "rdvl_r_i"},
+ },
+ },
+
+ { "_sxnkrh",
+ {23},
+ { {"1"_b, "_xxkvsy"},
+ },
+ },
+
+ { "_sxpvym",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ldnt1sb_z_p_ar_s_x32_unscaled"},
+ {"0001"_b, "ldnt1b_z_p_ar_s_x32_unscaled"},
+ {"0010"_b, "ld1rb_z_p_bi_u8"},
+ {"0011"_b, "ld1rb_z_p_bi_u16"},
+ {"0100"_b, "ldnt1sh_z_p_ar_s_x32_unscaled"},
+ {"0101"_b, "ldnt1h_z_p_ar_s_x32_unscaled"},
+ {"0110"_b, "ld1rsw_z_p_bi_s64"},
+ {"0111"_b, "ld1rh_z_p_bi_u16"},
+ {"1000"_b, "ldnt1sb_z_p_ar_d_64_unscaled"},
+ {"1010"_b, "ld1sb_z_p_bz_d_64_unscaled"},
+ {"1011"_b, "ldff1sb_z_p_bz_d_64_unscaled"},
+ {"1100"_b, "ldnt1sh_z_p_ar_d_64_unscaled"},
+ {"1110"_b, "ld1sh_z_p_bz_d_64_unscaled"},
+ {"1111"_b, "ldff1sh_z_p_bz_d_64_unscaled"},
+ },
+ },
+
+ { "_syktsg",
+ {13, 12},
+ { {"00"_b, "udiv_64_dp_2src"},
+ {"10"_b, "asrv_64_dp_2src"},
+ },
+ },
+
+ { "_syzjtz",
+ {13, 12, 10},
+ { {"010"_b, "sqrdmlah_asisdelem_r"},
+ {"101"_b, "_jqnglz"},
+ {"110"_b, "sqrdmlsh_asisdelem_r"},
+ {"111"_b, "_zslsvj"},
+ },
+ },
+
+ { "_szttjy",
+ {30, 23, 22, 19, 18, 17, 16},
+ { {"00000x1"_b, "umov_asimdins_w_w"},
+ {"0000x10"_b, "umov_asimdins_w_w"},
+ {"00010xx"_b, "umov_asimdins_w_w"},
+ {"0001110"_b, "umov_asimdins_w_w"},
+ {"000x10x"_b, "umov_asimdins_w_w"},
+ {"000x111"_b, "umov_asimdins_w_w"},
+ {"1001000"_b, "umov_asimdins_x_x"},
+ {"x01xxxx"_b, "frecps_asimdsamefp16_only"},
+ {"x11xxxx"_b, "frsqrts_asimdsamefp16_only"},
+ },
+ },
+
+ { "_tgmljr",
+ {23, 22, 20, 19, 12, 11},
+ { {"000000"_b, "movi_asimdimm_n_b"},
+ {"000010"_b, "fmov_asimdimm_s_s"},
+ {"000011"_b, "fmov_asimdimm_h_h"},
+ {"00x100"_b, "scvtf_asimdshf_c"},
+ {"00x111"_b, "fcvtzs_asimdshf_c"},
+ {"010x00"_b, "scvtf_asimdshf_c"},
+ {"010x11"_b, "fcvtzs_asimdshf_c"},
+ {"011100"_b, "scvtf_asimdshf_c"},
+ {"011111"_b, "fcvtzs_asimdshf_c"},
+ {"0x1000"_b, "scvtf_asimdshf_c"},
+ {"0x1011"_b, "fcvtzs_asimdshf_c"},
+ },
+ },
+
+ { "_tgqsyg",
+ {22},
+ { {"0"_b, "prfm_p_ldst_regoff"},
+ },
+ },
+
+ { "_thqvrp",
+ {17},
+ { {"0"_b, "st1_asisdlsep_r2_r2"},
+ {"1"_b, "st1_asisdlsep_i2_i2"},
+ },
+ },
+
+ { "_thrxph",
+ {23, 22, 10},
+ { {"100"_b, "umlalb_z_zzzi_s"},
+ {"101"_b, "umlalt_z_zzzi_s"},
+ {"110"_b, "umlalb_z_zzzi_d"},
+ {"111"_b, "umlalt_z_zzzi_d"},
+ },
+ },
+
+ { "_thsxvg",
+ {11, 10, 9, 8, 7, 6},
+ { {"000010"_b, "ssbb_only_barriers"},
+ {"010010"_b, "pssbb_only_barriers"},
+ {"0x1010"_b, "dsb_bo_barriers"},
+ {"0xx110"_b, "dsb_bo_barriers"},
+ {"1xxx10"_b, "dsb_bo_barriers"},
+ {"xxxx01"_b, "clrex_bn_barriers"},
+ {"xxxx11"_b, "isb_bi_barriers"},
+ },
+ },
+
+ { "_thvvzp",
+ {18, 17, 12},
+ { {"0x0"_b, "st1_asisdlsop_dx1_r1d"},
+ {"100"_b, "st1_asisdlsop_dx1_r1d"},
+ {"110"_b, "st1_asisdlsop_d1_i1d"},
+ },
+ },
+
+ { "_thvxym",
+ {20},
+ { {"0"_b, "_prkmty"},
+ {"1"_b, "_pjgkjs"},
+ },
+ },
+
+ { "_tjktkm",
+ {30},
+ { {"1"_b, "_gntpyh"},
+ },
+ },
+
+ { "_tjltls",
+ {18, 17},
+ { {"0x"_b, "st1_asisdlsep_r1_r1"},
+ {"10"_b, "st1_asisdlsep_r1_r1"},
+ {"11"_b, "st1_asisdlsep_i1_i1"},
+ },
+ },
+
+ { "_tjpjng",
+ {23, 22, 13, 12},
+ { {"0000"_b, "fmax_s_floatdp2"},
+ {"0001"_b, "fmin_s_floatdp2"},
+ {"0010"_b, "fmaxnm_s_floatdp2"},
+ {"0011"_b, "fminnm_s_floatdp2"},
+ {"0100"_b, "fmax_d_floatdp2"},
+ {"0101"_b, "fmin_d_floatdp2"},
+ {"0110"_b, "fmaxnm_d_floatdp2"},
+ {"0111"_b, "fminnm_d_floatdp2"},
+ {"1100"_b, "fmax_h_floatdp2"},
+ {"1101"_b, "fmin_h_floatdp2"},
+ {"1110"_b, "fmaxnm_h_floatdp2"},
+ {"1111"_b, "fminnm_h_floatdp2"},
+ },
+ },
+
+ { "_tjtgjy",
+ {20, 19, 18, 17},
+ { {"0000"_b, "_gjsnly"},
+ },
+ },
+
+ { "_tjzqnp",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ldnt1b_z_p_bi_contiguous"},
+ {"000x0"_b, "ldnt1b_z_p_br_contiguous"},
+ {"00101"_b, "ld3b_z_p_bi_contiguous"},
+ {"001x0"_b, "ld3b_z_p_br_contiguous"},
+ {"01001"_b, "ldnt1h_z_p_bi_contiguous"},
+ {"010x0"_b, "ldnt1h_z_p_br_contiguous"},
+ {"01101"_b, "ld3h_z_p_bi_contiguous"},
+ {"011x0"_b, "ld3h_z_p_br_contiguous"},
+ {"10011"_b, "stnt1b_z_p_bi_contiguous"},
+ {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"},
+ {"10111"_b, "st3b_z_p_bi_contiguous"},
+ {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"},
+ {"10x01"_b, "st1b_z_p_bi"},
+ {"11011"_b, "stnt1h_z_p_bi_contiguous"},
+ {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"},
+ {"11111"_b, "st3h_z_p_bi_contiguous"},
+ {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"},
+ {"11x01"_b, "st1h_z_p_bi"},
+ },
+ },
+
+ { "_tkjtgp",
+ {30},
+ { {"0"_b, "_sqgjmn"},
+ {"1"_b, "_ztpryr"},
+ },
+ },
+
+ { "_tkzqqp",
+ {4, 3, 2, 1, 0},
+ { {"11111"_b, "_ntkqhk"},
+ },
+ },
+
+ { "_tlstgz",
+ {30, 23, 22},
+ { {"000"_b, "stlxp_sp32_ldstexcl"},
+ {"001"_b, "ldaxp_lp32_ldstexcl"},
+ {"100"_b, "stlxp_sp64_ldstexcl"},
+ {"101"_b, "ldaxp_lp64_ldstexcl"},
+ },
+ },
+
+ { "_tlzlrj",
+ {17},
+ { {"0"_b, "st2_asisdlso_b2_2b"},
+ },
+ },
+
+ { "_tmhlvh",
+ {20, 9, 4},
+ { {"000"_b, "zip2_p_pp"},
+ },
+ },
+
+ { "_tmrnzq",
+ {17},
+ { {"0"_b, "st2_asisdlsep_r2_r"},
+ {"1"_b, "st2_asisdlsep_i2_i"},
+ },
+ },
+
+ { "_tmshps",
+ {17},
+ { {"0"_b, "fmaxnmv_v_p_z"},
+ {"1"_b, "fmaxv_v_p_z"},
+ },
+ },
+
+ { "_tmthqm",
+ {22},
+ { {"0"_b, "str_32_ldst_regoff"},
+ {"1"_b, "ldr_32_ldst_regoff"},
+ },
+ },
+
+ { "_tmtnkq",
+ {23, 18, 17, 16},
+ { {"0000"_b, "uqxtnb_z_zz"},
+ },
+ },
+
+ { "_tnhmpx",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1011001"_b, "fcmge_asisdsamefp16_only"},
+ {"1011011"_b, "facge_asisdsamefp16_only"},
+ {"1110101"_b, "fabd_asisdsamefp16_only"},
+ {"1111001"_b, "fcmgt_asisdsamefp16_only"},
+ {"1111011"_b, "facgt_asisdsamefp16_only"},
+ },
+ },
+
+ { "_tnrrjk",
+ {30, 23, 22, 11, 10},
+ { {"01000"_b, "csel_32_condsel"},
+ {"01001"_b, "csinc_32_condsel"},
+ {"11000"_b, "csinv_32_condsel"},
+ {"11001"_b, "csneg_32_condsel"},
+ },
+ },
+
+ { "_tnxlnl",
+ {13, 12},
+ { {"00"_b, "crc32x_64c_dp_2src"},
+ {"01"_b, "crc32cx_64c_dp_2src"},
+ },
+ },
+
+ { "_tnzytv",
+ {11, 10, 9, 8, 7, 6},
+ { {"000000"_b, "wfet_only_systeminstrswithreg"},
+ },
+ },
+
+ { "_tpkslq",
+ {30, 23, 22, 20, 13, 4},
+ { {"00001x"_b, "ld1rqw_z_p_bi_u32"},
+ {"000x0x"_b, "ld1rqw_z_p_br_contiguous"},
+ {"01001x"_b, "ld1rqd_z_p_bi_u64"},
+ {"010x0x"_b, "ld1rqd_z_p_br_contiguous"},
+ {"100x1x"_b, "stnt1w_z_p_ar_d_64_unscaled"},
+ {"101x1x"_b, "stnt1w_z_p_ar_s_x32_unscaled"},
+ {"110x00"_b, "str_p_bi"},
+ {"110x1x"_b, "stnt1d_z_p_ar_d_64_unscaled"},
+ },
+ },
+
+ { "_tpkzxg",
+ {4},
+ { {"0"_b, "ccmp_64_condcmp_imm"},
+ },
+ },
+
+ { "_tpsylx",
+ {13},
+ { {"0"_b, "_gjylrt"},
+ {"1"_b, "_ygjslq"},
+ },
+ },
+
+ { "_trlhgn",
+ {30, 23, 22, 11, 10},
+ { {"00010"_b, "str_b_ldst_regoff"},
+ {"00110"_b, "ldr_b_ldst_regoff"},
+ {"01010"_b, "str_q_ldst_regoff"},
+ {"01110"_b, "ldr_q_ldst_regoff"},
+ {"10010"_b, "str_h_ldst_regoff"},
+ {"10110"_b, "ldr_h_ldst_regoff"},
},
},
- // Unallocation decode nodes. These are used to mark encodings within an
- // instruction class as unallocated.
- { "UnallocAddSubExtended",
+ { "_tsksxr",
+ {17},
+ { {"0"_b, "fminnmv_v_p_z"},
+ {"1"_b, "fminv_v_p_z"},
+ },
+ },
+
+ { "_tssqsr",
+ {30},
+ { {"1"_b, "_syzjtz"},
+ },
+ },
+
+ { "_tsvsgh",
+ {17},
+ { {"0"_b, "st1_asisdlso_b1_1b"},
+ },
+ },
+
+ { "_tszvvk",
+ {18, 17, 12},
+ { {"000"_b, "ld2_asisdlso_d2_2d"},
+ },
+ },
+
+ { "_ttplgp",
{12, 11, 10},
- { {"1x1", "VisitUnallocated"},
- {"11x", "VisitUnallocated"},
- {"otherwise", "UnallocAddSubExtended_2"},
+ { {"000"_b, "sqincp_z_p_z"},
+ {"010"_b, "sqincp_r_p_r_sx"},
+ {"011"_b, "sqincp_r_p_r_x"},
+ {"100"_b, "_zqmrhp"},
},
},
- { "UnallocAddSubExtended_2",
+ { "_ttstyt",
+ {12, 10},
+ { {"00"_b, "_rkqtvs"},
+ {"01"_b, "_mtlhnl"},
+ {"10"_b, "_zlmgyp"},
+ {"11"_b, "_kjghlk"},
+ },
+ },
+
+ { "_tvgvvq",
+ {30},
+ { {"0"_b, "cbnz_32_compbranch"},
+ },
+ },
+
+ { "_tvsszp",
{23, 22},
- { {"1x", "VisitUnallocated"},
- {"x1", "VisitUnallocated"},
- {"otherwise", "VisitAddSubExtended"},
+ { {"00"_b, "fmadd_s_floatdp3"},
+ {"01"_b, "fmadd_d_floatdp3"},
+ {"11"_b, "fmadd_h_floatdp3"},
},
},
- { "UnallocAddSubImmediate",
- {23},
- { {"0", "VisitAddSubImmediate"},
- {"1", "VisitUnallocated"},
+ { "_txhzxq",
+ {30, 22, 11},
+ { {"000"_b, "_svnyyx"},
+ {"001"_b, "_qsxpyq"},
+ {"010"_b, "_pnqxjg"},
+ {"011"_b, "_myrshl"},
+ {"100"_b, "_smrtxq"},
+ {"110"_b, "_ryglvl"},
+ {"111"_b, "_qqsmlt"},
+ },
+ },
+
+ { "_txjyxr",
+ {18, 17},
+ { {"0x"_b, "ld1_asisdlsep_r1_r1"},
+ {"10"_b, "ld1_asisdlsep_r1_r1"},
+ {"11"_b, "ld1_asisdlsep_i1_i1"},
+ },
+ },
+
+ { "_txnqzy",
+ {30, 23, 22},
+ { {"000"_b, "smsubl_64wa_dp_3src"},
+ {"010"_b, "umsubl_64wa_dp_3src"},
+ },
+ },
+
+ { "_txsmts",
+ {13, 12, 11, 10},
+ { {"0000"_b, "smlal_asimddiff_l"},
+ {"0001"_b, "add_asimdsame_only"},
+ {"0010"_b, "_qhsplz"},
+ {"0011"_b, "cmtst_asimdsame_only"},
+ {"0100"_b, "sqdmlal_asimddiff_l"},
+ {"0101"_b, "mla_asimdsame_only"},
+ {"0110"_b, "_yvxgrr"},
+ {"0111"_b, "mul_asimdsame_only"},
+ {"1000"_b, "smlsl_asimddiff_l"},
+ {"1001"_b, "smaxp_asimdsame_only"},
+ {"1010"_b, "_mnxmst"},
+ {"1011"_b, "sminp_asimdsame_only"},
+ {"1100"_b, "sqdmlsl_asimddiff_l"},
+ {"1101"_b, "sqdmulh_asimdsame_only"},
+ {"1110"_b, "_klkgqk"},
+ {"1111"_b, "addp_asimdsame_only"},
+ },
+ },
+
+ { "_txzxzs",
+ {23, 22, 20, 19, 18},
+ { {"00000"_b, "orr_z_zi"},
+ {"01000"_b, "eor_z_zi"},
+ {"10000"_b, "and_z_zi"},
+ {"11000"_b, "dupm_z_i"},
+ {"xx1xx"_b, "cpy_z_p_i"},
},
},
- { "UnallocAddSubShifted",
+ { "_tyjqvt",
+ {18, 17},
+ { {"00"_b, "ld4_asisdlso_s4_4s"},
+ },
+ },
+
+ { "_tylqpt",
+ {23, 22, 13},
+ { {"000"_b, "fmulx_asimdelem_rh_h"},
+ {"1x0"_b, "fmulx_asimdelem_r_sd"},
+ },
+ },
+
+ { "_typysz",
+ {23, 22, 20, 19, 13, 11, 10},
+ { {"00x1001"_b, "sqshrn_asisdshf_n"},
+ {"00x1011"_b, "sqrshrn_asisdshf_n"},
+ {"00xx0x0"_b, "fmul_asisdelem_rh_h"},
+ {"010x001"_b, "sqshrn_asisdshf_n"},
+ {"010x011"_b, "sqrshrn_asisdshf_n"},
+ {"0111001"_b, "sqshrn_asisdshf_n"},
+ {"0111011"_b, "sqrshrn_asisdshf_n"},
+ {"0x10001"_b, "sqshrn_asisdshf_n"},
+ {"0x10011"_b, "sqrshrn_asisdshf_n"},
+ {"1xxx0x0"_b, "fmul_asisdelem_r_sd"},
+ {"xxxx1x0"_b, "sqdmull_asisdelem_l"},
+ },
+ },
+
+ { "_tytvjk",
+ {13, 12, 11},
+ { {"000"_b, "_lylpyx"},
+ {"001"_b, "_kyxrqg"},
+ {"010"_b, "_zmkqxl"},
+ {"011"_b, "_gngjxr"},
+ {"100"_b, "_mlxtxs"},
+ {"101"_b, "_mnmtql"},
+ {"110"_b, "_xmxpnx"},
+ {"111"_b, "_lkttgy"},
+ },
+ },
+
+ { "_tzzhsk",
+ {13, 12},
+ { {"01"_b, "sqdmlal_asisddiff_only"},
+ {"11"_b, "sqdmlsl_asisddiff_only"},
+ },
+ },
+
+ { "_tzzssm",
+ {12, 11, 10},
+ { {"000"_b, "histseg_z_zz"},
+ },
+ },
+
+ { "_tzzzxz",
+ {30, 23, 22, 20, 19},
+ { {"0xxxx"_b, "bl_only_branch_imm"},
+ {"10001"_b, "sysl_rc_systeminstrs"},
+ {"1001x"_b, "mrs_rs_systemmove"},
+ },
+ },
+
+ { "_vgrhsz",
+ {30, 23, 11, 10},
+ { {"0010"_b, "_hljrqn"},
+ {"0100"_b, "_htnmls"},
+ {"0110"_b, "_vxgzqy"},
+ {"1000"_b, "_lpsxhz"},
+ {"1001"_b, "ldraa_64_ldst_pac"},
+ {"1010"_b, "_jtqlhs"},
+ {"1011"_b, "ldraa_64w_ldst_pac"},
+ {"1100"_b, "_yrlzqp"},
+ {"1101"_b, "ldrab_64_ldst_pac"},
+ {"1110"_b, "_xyhxzt"},
+ {"1111"_b, "ldrab_64w_ldst_pac"},
+ },
+ },
+
+ { "_vgrtjz",
+ {12},
+ { {"0"_b, "sqdmulh_asimdelem_r"},
+ {"1"_b, "sqrdmulh_asimdelem_r"},
+ },
+ },
+
+ { "_vgtnjh",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0001010"_b, "fcvtxnt_z_p_z_d2s"},
+ {"1001000"_b, "fcvtnt_z_p_z_s2h"},
+ {"1001001"_b, "fcvtlt_z_p_z_h2s"},
+ {"1001010"_b, "bfcvtnt_z_p_z_s2bf"},
+ {"1101010"_b, "fcvtnt_z_p_z_d2s"},
+ {"1101011"_b, "fcvtlt_z_p_z_s2d"},
+ },
+ },
+
+ { "_vhhktl",
+ {18, 17},
+ { {"0x"_b, "st4_asisdlsop_sx4_r4s"},
+ {"10"_b, "st4_asisdlsop_sx4_r4s"},
+ {"11"_b, "st4_asisdlsop_s4_i4s"},
+ },
+ },
+
+ { "_vhmsgj",
+ {18, 17, 12},
+ { {"000"_b, "ld1_asisdlso_d1_1d"},
+ },
+ },
+
+ { "_vjlnqj",
+ {23, 22, 13, 12},
+ { {"0000"_b, "fnmul_s_floatdp2"},
+ {"0100"_b, "fnmul_d_floatdp2"},
+ {"1100"_b, "fnmul_h_floatdp2"},
+ },
+ },
+
+ { "_vjmklj",
+ {23, 22},
+ { {"10"_b, "sqrdcmlah_z_zzzi_h"},
+ {"11"_b, "sqrdcmlah_z_zzzi_s"},
+ },
+ },
+
+ { "_vjqsqs",
+ {30},
+ { {"0"_b, "and_32_log_shift"},
+ {"1"_b, "eor_32_log_shift"},
+ },
+ },
+
+ { "_vjxqhp",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_jlrvpl"},
+ {"0000001"_b, "_pmkxlj"},
+ {"0100000"_b, "_qmgtyq"},
+ {"0100001"_b, "_qhxzxl"},
+ {"100xxx0"_b, "st2_asisdlsep_r2_r"},
+ {"100xxx1"_b, "st1_asisdlsep_r2_r2"},
+ {"1010xx0"_b, "st2_asisdlsep_r2_r"},
+ {"1010xx1"_b, "st1_asisdlsep_r2_r2"},
+ {"10110x0"_b, "st2_asisdlsep_r2_r"},
+ {"10110x1"_b, "st1_asisdlsep_r2_r2"},
+ {"1011100"_b, "st2_asisdlsep_r2_r"},
+ {"1011101"_b, "st1_asisdlsep_r2_r2"},
+ {"1011110"_b, "_tmrnzq"},
+ {"1011111"_b, "_thqvrp"},
+ {"110xxx0"_b, "ld2_asisdlsep_r2_r"},
+ {"110xxx1"_b, "ld1_asisdlsep_r2_r2"},
+ {"1110xx0"_b, "ld2_asisdlsep_r2_r"},
+ {"1110xx1"_b, "ld1_asisdlsep_r2_r2"},
+ {"11110x0"_b, "ld2_asisdlsep_r2_r"},
+ {"11110x1"_b, "ld1_asisdlsep_r2_r2"},
+ {"1111100"_b, "ld2_asisdlsep_r2_r"},
+ {"1111101"_b, "ld1_asisdlsep_r2_r2"},
+ {"1111110"_b, "_nszhhy"},
+ {"1111111"_b, "_qxrzgv"},
+ },
+ },
+
+ { "_vjymzn",
{23, 22},
- { {"11", "VisitUnallocated"},
- {"otherwise", "UnallocAddSubShifted_2"},
+ { {"00"_b, "fcsel_s_floatsel"},
+ {"01"_b, "fcsel_d_floatsel"},
+ {"11"_b, "fcsel_h_floatsel"},
},
},
- { "UnallocAddSubShifted_2",
- {31, 15},
- { {"01", "VisitUnallocated"},
- {"otherwise", "VisitAddSubShifted"},
+ { "_vkhhkk",
+ {30, 23, 22, 11, 10, 4},
+ { {"001000"_b, "ccmn_64_condcmp_reg"},
+ {"001100"_b, "ccmn_64_condcmp_imm"},
+ {"101000"_b, "ccmp_64_condcmp_reg"},
+ {"101100"_b, "ccmp_64_condcmp_imm"},
},
},
- { "UnallocAddSubWithCarry",
- {15, 14, 13, 12, 11, 10},
- { {"000000", "VisitAddSubWithCarry"},
- {"x00001", "UnallocRotateRightIntoFlags"},
- {"xx0010", "UnallocEvaluateIntoFlags"},
- {"otherwise", "VisitUnallocated"},
+ { "_vkrkks",
+ {30, 23, 22, 13, 4},
+ { {"00000"_b, "prfb_i_p_br_s"},
+ {"00010"_b, "prfb_i_p_ai_s"},
+ {"0010x"_b, "ld1rb_z_p_bi_u32"},
+ {"0011x"_b, "ld1rb_z_p_bi_u64"},
+ {"01000"_b, "prfh_i_p_br_s"},
+ {"01010"_b, "prfh_i_p_ai_s"},
+ {"0110x"_b, "ld1rh_z_p_bi_u32"},
+ {"0111x"_b, "ld1rh_z_p_bi_u64"},
+ {"1000x"_b, "ldnt1b_z_p_ar_d_64_unscaled"},
+ {"10010"_b, "prfb_i_p_ai_d"},
+ {"1010x"_b, "ld1b_z_p_bz_d_64_unscaled"},
+ {"1011x"_b, "ldff1b_z_p_bz_d_64_unscaled"},
+ {"1100x"_b, "ldnt1h_z_p_ar_d_64_unscaled"},
+ {"11010"_b, "prfh_i_p_ai_d"},
+ {"1110x"_b, "ld1h_z_p_bz_d_64_unscaled"},
+ {"1111x"_b, "ldff1h_z_p_bz_d_64_unscaled"},
},
},
- { "UnallocAtomicMemory",
- {26, 23, 22, 15, 14, 13, 12},
- { {"0xx1001", "VisitUnallocated"},
- {"0xx101x", "VisitUnallocated"},
- {"0xx1101", "VisitUnallocated"},
- {"0xx111x", "VisitUnallocated"},
- {"00x1100", "VisitUnallocated"},
- {"0111100", "VisitUnallocated"},
- {"1xxxxxx", "VisitUnallocated"},
- {"otherwise", "VisitAtomicMemory"},
+ { "_vkvgnm",
+ {30, 13},
+ { {"10"_b, "_vyygqs"},
},
},
- { "UnallocBitfield",
- {31, 30, 29, 22},
- { {"x11x", "VisitUnallocated"},
- {"0xx1", "VisitUnallocated"},
- {"1xx0", "VisitUnallocated"},
- {"otherwise", "VisitBitfield"},
+ { "_vkyngx",
+ {23, 22, 19, 18, 17, 16},
+ { {"0000x1"_b, "dup_asimdins_dv_v"},
+ {"000x10"_b, "dup_asimdins_dv_v"},
+ {"0010xx"_b, "dup_asimdins_dv_v"},
+ {"001110"_b, "dup_asimdins_dv_v"},
+ {"00x10x"_b, "dup_asimdins_dv_v"},
+ {"00x111"_b, "dup_asimdins_dv_v"},
+ {"01xxxx"_b, "fmaxnm_asimdsamefp16_only"},
+ {"11xxxx"_b, "fminnm_asimdsamefp16_only"},
},
},
- { "UnallocConditionalBranch",
- {24, 4},
- { {"00", "VisitConditionalBranch"},
- {"otherwise", "VisitUnallocated"},
+ { "_vllqmp",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"000xxxx"_b, "stxp_sp32_ldstexcl"},
+ {"001xxxx"_b, "ldxp_lp32_ldstexcl"},
+ {"0101111"_b, "cas_c32_ldstexcl"},
+ {"0111111"_b, "casa_c32_ldstexcl"},
+ {"100xxxx"_b, "stxp_sp64_ldstexcl"},
+ {"101xxxx"_b, "ldxp_lp64_ldstexcl"},
+ {"1101111"_b, "cas_c64_ldstexcl"},
+ {"1111111"_b, "casa_c64_ldstexcl"},
},
},
- { "UnallocConditionalCompareImmediate",
- {10, 4, 29},
- { {"1xx", "VisitUnallocated"},
- {"x1x", "VisitUnallocated"},
- {"xx0", "VisitUnallocated"},
- {"otherwise", "VisitConditionalCompareImmediate"},
+ { "_vlrhpy",
+ {30, 23, 22, 13, 4},
+ { {"0000x"_b, "ld1sb_z_p_ai_s"},
+ {"0001x"_b, "ldff1sb_z_p_ai_s"},
+ {"0010x"_b, "ld1rb_z_p_bi_u8"},
+ {"0011x"_b, "ld1rb_z_p_bi_u16"},
+ {"0100x"_b, "ld1sh_z_p_ai_s"},
+ {"0101x"_b, "ldff1sh_z_p_ai_s"},
+ {"0110x"_b, "ld1rsw_z_p_bi_s64"},
+ {"0111x"_b, "ld1rh_z_p_bi_u16"},
+ {"1000x"_b, "ld1sb_z_p_ai_d"},
+ {"1001x"_b, "ldff1sb_z_p_ai_d"},
+ {"10100"_b, "prfb_i_p_bz_d_64_scaled"},
+ {"10110"_b, "prfh_i_p_bz_d_64_scaled"},
+ {"1100x"_b, "ld1sh_z_p_ai_d"},
+ {"1101x"_b, "ldff1sh_z_p_ai_d"},
+ {"1110x"_b, "ld1sh_z_p_bz_d_64_scaled"},
+ {"1111x"_b, "ldff1sh_z_p_bz_d_64_scaled"},
},
},
- { "UnallocConditionalCompareRegister",
- {10, 4, 29},
- { {"1xx", "VisitUnallocated"},
- {"x1x", "VisitUnallocated"},
- {"xx0", "VisitUnallocated"},
- {"otherwise", "VisitConditionalCompareRegister"},
+ { "_vlrrtz",
+ {30, 23, 22},
+ { {"001"_b, "bfm_64m_bitfield"},
},
},
- { "UnallocConditionalSelect",
- {11, 29},
- { {"00", "VisitConditionalSelect"},
- {"otherwise", "VisitUnallocated"},
+ { "_vlsmsn",
+ {22, 20, 19, 18, 17, 16},
+ { {"111000"_b, "fcmle_asisdmiscfp16_fz"},
+ {"111001"_b, "frsqrte_asisdmiscfp16_r"},
+ {"x00000"_b, "fcmle_asisdmisc_fz"},
+ {"x00001"_b, "frsqrte_asisdmisc_r"},
},
},
- { "UnallocDataProcessing1Source",
- {31, 16, 14, 13, 12, 11, 10},
- { {"x0xx11x", "VisitUnallocated"},
- {"0000011", "VisitUnallocated"},
- {"1001xxx", "VisitUnallocated"},
- {"x01xxxx", "VisitUnallocated"},
- {"x0x1xxx", "VisitUnallocated"},
- {"01xxxxx", "VisitUnallocated"},
- {"111xx1x", "VisitUnallocated"},
- {"111x1xx", "VisitUnallocated"},
- {"1111xxx", "VisitUnallocated"},
- {"otherwise", "UnallocDataProcessing1Source_2"},
+ { "_vlzrlm",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "mvni_asimdimm_l_sl"},
+ {"00x100"_b, "sri_asimdshf_r"},
+ {"00x110"_b, "sqshlu_asimdshf_r"},
+ {"010x00"_b, "sri_asimdshf_r"},
+ {"010x10"_b, "sqshlu_asimdshf_r"},
+ {"011100"_b, "sri_asimdshf_r"},
+ {"011110"_b, "sqshlu_asimdshf_r"},
+ {"0x1000"_b, "sri_asimdshf_r"},
+ {"0x1010"_b, "sqshlu_asimdshf_r"},
},
},
- { "UnallocDataProcessing1Source_2",
- {29, 20, 19, 18, 17, 15},
- { {"000000", "VisitDataProcessing1Source"},
- {"otherwise", "VisitUnallocated"},
+ { "_vmjgmg",
+ {30, 23, 22},
+ { {"000"_b, "stxrb_sr32_ldstexcl"},
+ {"001"_b, "ldxrb_lr32_ldstexcl"},
+ {"010"_b, "stllrb_sl32_ldstexcl"},
+ {"011"_b, "ldlarb_lr32_ldstexcl"},
+ {"100"_b, "stxrh_sr32_ldstexcl"},
+ {"101"_b, "ldxrh_lr32_ldstexcl"},
+ {"110"_b, "stllrh_sl32_ldstexcl"},
+ {"111"_b, "ldlarh_lr32_ldstexcl"},
},
},
- { "UnallocDataProcessing2Source",
- {31, 14, 13, 12, 11, 10},
- { {"x0000x", "VisitUnallocated"},
- {"x11xxx", "VisitUnallocated"},
- {"010x11", "VisitUnallocated"},
- {"110xx0", "VisitUnallocated"},
- {"110x0x", "VisitUnallocated"},
- {"otherwise", "UnallocDataProcessing2Source_2"},
+ { "_vmjtrx",
+ {23, 22, 12},
+ { {"001"_b, "sudot_asimdelem_d"},
+ {"011"_b, "bfdot_asimdelem_e"},
+ {"101"_b, "usdot_asimdelem_d"},
+ {"111"_b, "bfmlal_asimdelem_f"},
+ {"xx0"_b, "sdot_asimdelem_d"},
},
},
- { "UnallocDataProcessing2Source_2",
- {29, 15},
- { {"00", "VisitDataProcessing2Source"},
- {"otherwise", "VisitUnallocated"},
+ { "_vmjzyk",
+ {30, 23, 22},
+ { {"000"_b, "stp_32_ldstpair_off"},
+ {"001"_b, "ldp_32_ldstpair_off"},
+ {"010"_b, "stp_32_ldstpair_pre"},
+ {"011"_b, "ldp_32_ldstpair_pre"},
+ {"100"_b, "stgp_64_ldstpair_off"},
+ {"101"_b, "ldpsw_64_ldstpair_off"},
+ {"110"_b, "stgp_64_ldstpair_pre"},
+ {"111"_b, "ldpsw_64_ldstpair_pre"},
},
},
- { "UnallocDataProcessing3Source",
- {23, 22, 21, 15, 31},
- { {"00100", "VisitUnallocated"},
- {"00110", "VisitUnallocated"},
- {"01000", "VisitUnallocated"},
- {"0101x", "VisitUnallocated"},
- {"011xx", "VisitUnallocated"},
- {"100xx", "VisitUnallocated"},
- {"10100", "VisitUnallocated"},
- {"10110", "VisitUnallocated"},
- {"11000", "VisitUnallocated"},
- {"1101x", "VisitUnallocated"},
- {"111xx", "VisitUnallocated"},
- {"otherwise", "UnallocDataProcessing3Source_2"},
+ { "_vmplgv",
+ {12},
+ { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
},
},
- { "UnallocDataProcessing3Source_2",
- {30, 29},
- { {"00", "VisitDataProcessing3Source"},
- {"otherwise", "VisitUnallocated"},
+ { "_vmpnlv",
+ {11, 10, 9, 8, 7, 6},
+ { {"000000"_b, "wfit_only_systeminstrswithreg"},
},
},
- { "UnallocEvaluateIntoFlags",
- {31, 30, 29, 20, 19, 18},
- { {"001000", "UnallocEvaluateIntoFlags_2"},
- {"otherwise", "VisitUnallocated"},
+ { "_vnpqrh",
+ {30, 23, 22},
+ { {"000"_b, "stp_s_ldstpair_off"},
+ {"001"_b, "ldp_s_ldstpair_off"},
+ {"010"_b, "stp_s_ldstpair_pre"},
+ {"011"_b, "ldp_s_ldstpair_pre"},
+ {"100"_b, "stp_d_ldstpair_off"},
+ {"101"_b, "ldp_d_ldstpair_off"},
+ {"110"_b, "stp_d_ldstpair_pre"},
+ {"111"_b, "ldp_d_ldstpair_pre"},
},
},
- { "UnallocEvaluateIntoFlags_2",
- {17, 16, 15, 4, 3, 2, 1, 0},
- { {"00001101", "VisitEvaluateIntoFlags"},
- {"otherwise", "VisitUnallocated"},
+ { "_vnrnmg",
+ {17},
+ { {"0"_b, "st4_asisdlse_r4"},
},
},
- { "UnallocException",
- {23, 22, 21, 1, 0},
- { {"00000", "VisitUnallocated"},
- {"001x1", "VisitUnallocated"},
- {"0011x", "VisitUnallocated"},
- {"010x1", "VisitUnallocated"},
- {"0101x", "VisitUnallocated"},
- {"011xx", "VisitUnallocated"},
- {"100xx", "VisitUnallocated"},
- {"10100", "VisitUnallocated"},
- {"11xxx", "VisitUnallocated"},
- {"otherwise", "UnallocException_2"},
+ { "_vpkhvh",
+ {17},
+ { {"0"_b, "st2_asisdlso_h2_2h"},
},
},
- { "UnallocException_2",
- {4, 3, 2},
- { {"000", "VisitException"},
- {"otherwise", "VisitUnallocated"},
+ { "_vpkptr",
+ {30, 23, 22},
+ { {"000"_b, "stnp_32_ldstnapair_offs"},
+ {"001"_b, "ldnp_32_ldstnapair_offs"},
+ {"010"_b, "stp_32_ldstpair_post"},
+ {"011"_b, "ldp_32_ldstpair_post"},
+ {"110"_b, "stgp_64_ldstpair_post"},
+ {"111"_b, "ldpsw_64_ldstpair_post"},
},
},
- { "UnallocExtract",
- {30, 29, 21},
- { {"000", "UnallocExtract_2"},
- {"otherwise", "VisitUnallocated"},
+ { "_vpmxrj",
+ {13},
+ { {"0"_b, "histcnt_z_p_zz"},
+ {"1"_b, "_jxszhy"},
},
},
- { "UnallocExtract_2",
- {31, 22, 15},
- { {"10x", "VisitUnallocated"},
- {"01x", "VisitUnallocated"},
- {"0x1", "VisitUnallocated"},
- {"otherwise", "VisitExtract"},
+ { "_vppthj",
+ {30, 23},
+ { {"00"_b, "add_32_addsub_imm"},
+ {"10"_b, "sub_32_addsub_imm"},
},
},
- { "UnallocFPCompare",
- {31, 29, 15, 14, 2, 1, 0},
- { {"0000000", "UnallocFPCompare_2"},
- {"otherwise", "VisitUnallocated"},
+ { "_vprkpq",
+ {13, 12, 11, 10},
+ { {"0000"_b, "saddwb_z_zz"},
+ {"0001"_b, "saddwt_z_zz"},
+ {"0010"_b, "uaddwb_z_zz"},
+ {"0011"_b, "uaddwt_z_zz"},
+ {"0100"_b, "ssubwb_z_zz"},
+ {"0101"_b, "ssubwt_z_zz"},
+ {"0110"_b, "usubwb_z_zz"},
+ {"0111"_b, "usubwt_z_zz"},
+ {"1000"_b, "sqdmullb_z_zz"},
+ {"1001"_b, "sqdmullt_z_zz"},
+ {"1010"_b, "pmullb_z_zz"},
+ {"1011"_b, "pmullt_z_zz"},
+ {"1100"_b, "smullb_z_zz"},
+ {"1101"_b, "smullt_z_zz"},
+ {"1110"_b, "umullb_z_zz"},
+ {"1111"_b, "umullt_z_zz"},
},
},
- { "UnallocFPCompare_2",
- {23, 22},
- { {"10", "VisitUnallocated"},
- {"otherwise", "VisitFPCompare"},
+ { "_vpxvjs",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "fcvtns_32s_float2int"},
+ {"00001"_b, "fcvtnu_32s_float2int"},
+ {"00010"_b, "scvtf_s32_float2int"},
+ {"00011"_b, "ucvtf_s32_float2int"},
+ {"00100"_b, "fcvtas_32s_float2int"},
+ {"00101"_b, "fcvtau_32s_float2int"},
+ {"00110"_b, "fmov_32s_float2int"},
+ {"00111"_b, "fmov_s32_float2int"},
+ {"01000"_b, "fcvtps_32s_float2int"},
+ {"01001"_b, "fcvtpu_32s_float2int"},
+ {"10000"_b, "fcvtms_32s_float2int"},
+ {"10001"_b, "fcvtmu_32s_float2int"},
+ {"11000"_b, "fcvtzs_32s_float2int"},
+ {"11001"_b, "fcvtzu_32s_float2int"},
+ },
+ },
+
+ { "_vpykkg",
+ {23, 22, 10},
+ { {"000"_b, "ext_asimdext_only"},
+ {"001"_b, "_jnmgrh"},
+ {"011"_b, "_vytgtz"},
+ {"111"_b, "_jrnlzs"},
},
},
- { "UnallocFPConditionalCompare",
- {31, 29, 23, 22},
- { {"xx10", "VisitUnallocated"},
- {"x1xx", "VisitUnallocated"},
- {"1xxx", "VisitUnallocated"},
- {"otherwise", "VisitFPConditionalCompare"},
+ { "_vqlytp",
+ {12},
+ { {"0"_b, "st3_asisdlsop_dx3_r3d"},
},
},
- { "UnallocFPConditionalSelect",
- {31, 29, 23, 22},
- { {"xx10", "VisitUnallocated"},
- {"x1xx", "VisitUnallocated"},
- {"1xxx", "VisitUnallocated"},
- {"otherwise", "VisitFPConditionalSelect"},
+ { "_vqqrjl",
+ {23, 22, 20, 19, 13, 11, 10},
+ { {"0001001"_b, "shl_asisdshf_r"},
+ {"0001101"_b, "sqshl_asisdshf_r"},
+ {"001x001"_b, "shl_asisdshf_r"},
+ {"001x101"_b, "sqshl_asisdshf_r"},
+ {"00xx0x0"_b, "fmls_asisdelem_rh_h"},
+ {"01xx001"_b, "shl_asisdshf_r"},
+ {"01xx101"_b, "sqshl_asisdshf_r"},
+ {"1xxx0x0"_b, "fmls_asisdelem_r_sd"},
+ {"xxxx1x0"_b, "sqdmlsl_asisdelem_l"},
},
},
- { "UnallocFPDataProcessing1Source",
- {31, 29, 20},
- { {"000", "UnallocFPDataProcessing1Source_2"},
- {"otherwise", "VisitUnallocated"},
+ { "_vqvqhp",
+ {30, 23, 22},
+ { {"000"_b, "str_32_ldst_pos"},
+ {"001"_b, "ldr_32_ldst_pos"},
+ {"010"_b, "ldrsw_64_ldst_pos"},
+ {"100"_b, "str_64_ldst_pos"},
+ {"101"_b, "ldr_64_ldst_pos"},
+ {"110"_b, "prfm_p_ldst_pos"},
},
},
- { "UnallocFPDataProcessing1Source_2",
- {23, 22, 19, 18, 17, 16, 15},
- { {"0000100", "VisitUnallocated"},
- {"0000110", "VisitUnallocated"},
- {"0001101", "VisitUnallocated"},
- {"00101xx", "VisitUnallocated"},
- {"0011xxx", "VisitUnallocated"},
- {"0100101", "VisitUnallocated"},
- {"0101101", "VisitUnallocated"},
- {"01101xx", "VisitUnallocated"},
- {"0111xxx", "VisitUnallocated"},
- {"10xxxxx", "VisitUnallocated"},
- {"110011x", "VisitUnallocated"},
- {"1101101", "VisitUnallocated"},
- {"111xxxx", "VisitUnallocated"},
- {"otherwise", "VisitFPDataProcessing1Source"},
+ { "_vqzlzt",
+ {30, 23},
+ { {"00"_b, "and_64_log_imm"},
+ {"01"_b, "movn_64_movewide"},
+ {"10"_b, "eor_64_log_imm"},
+ {"11"_b, "movz_64_movewide"},
},
},
- { "UnallocFPDataProcessing2Source",
- {15, 14, 13, 12},
- { {"1xx1", "VisitUnallocated"},
- {"1x1x", "VisitUnallocated"},
- {"11xx", "VisitUnallocated"},
- {"otherwise", "UnallocFPDataProcessing2Source_2"},
+ { "_vsqlkr",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "frintx_asimdmiscfp16_r"},
+ {"0x00001"_b, "frintx_asimdmisc_r"},
+ {"1111001"_b, "frinti_asimdmiscfp16_r"},
+ {"1x00001"_b, "frinti_asimdmisc_r"},
+ {"xx00000"_b, "cmle_asimdmisc_z"},
},
},
- { "UnallocFPDataProcessing2Source_2",
- {31, 29, 23, 22},
- { {"xx10", "VisitUnallocated"},
- {"x1xx", "VisitUnallocated"},
- {"1xxx", "VisitUnallocated"},
- {"otherwise", "VisitFPDataProcessing2Source"},
+ { "_vsqpzr",
+ {23},
+ { {"0"_b, "faddp_asimdsame_only"},
+ {"1"_b, "fabd_asimdsame_only"},
},
},
- { "UnallocFPDataProcessing3Source",
- {31, 29, 23, 22},
- { {"xx10", "VisitUnallocated"},
- {"x1xx", "VisitUnallocated"},
- {"1xxx", "VisitUnallocated"},
- {"otherwise", "VisitFPDataProcessing3Source"},
+ { "_vsvrgt",
+ {17},
+ { {"0"_b, "fadda_v_p_z"},
},
},
- { "UnallocFPFixedPointConvert",
- {23, 22, 20, 19, 17, 16},
- { {"10xxxx", "VisitUnallocated"},
- {"xxx00x", "VisitUnallocated"},
- {"xxx11x", "VisitUnallocated"},
- {"xx0x0x", "VisitUnallocated"},
- {"xx1x1x", "VisitUnallocated"},
- {"otherwise", "UnallocFPFixedPointConvert_2"},
+ { "_vsvtqz",
+ {30, 23, 22},
+ { {"00x"_b, "add_64_addsub_imm"},
+ {"010"_b, "addg_64_addsub_immtags"},
+ {"10x"_b, "sub_64_addsub_imm"},
+ {"110"_b, "subg_64_addsub_immtags"},
},
},
- { "UnallocFPFixedPointConvert_2",
- {29, 18},
- { {"00", "UnallocFPFixedPointConvert_3"},
- {"otherwise", "VisitUnallocated"},
+ { "_vtxyxz",
+ {23, 22, 13, 12, 11, 10},
+ { {"01x1x0"_b, "fcmla_asimdelem_c_h"},
+ {"0x0001"_b, "ushr_asimdshf_r"},
+ {"0x0101"_b, "usra_asimdshf_r"},
+ {"0x1001"_b, "urshr_asimdshf_r"},
+ {"0x1101"_b, "ursra_asimdshf_r"},
+ {"10x1x0"_b, "fcmla_asimdelem_c_s"},
+ {"xx00x0"_b, "mla_asimdelem_r"},
+ {"xx10x0"_b, "umlal_asimdelem_l"},
},
},
- { "UnallocFPFixedPointConvert_3",
- {31, 15},
- { {"00", "VisitUnallocated"},
- {"otherwise", "VisitFPFixedPointConvert"},
+ { "_vvhzhv",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0000000"_b, "swpb_32_memop"},
+ {"000xx10"_b, "strb_32b_ldst_regoff"},
+ {"0010000"_b, "swplb_32_memop"},
+ {"001xx10"_b, "ldrb_32b_ldst_regoff"},
+ {"0100000"_b, "swpab_32_memop"},
+ {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
+ {"0110000"_b, "swpalb_32_memop"},
+ {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
+ {"1000000"_b, "swph_32_memop"},
+ {"100xx10"_b, "strh_32_ldst_regoff"},
+ {"1010000"_b, "swplh_32_memop"},
+ {"101xx10"_b, "ldrh_32_ldst_regoff"},
+ {"1100000"_b, "swpah_32_memop"},
+ {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+ {"1110000"_b, "swpalh_32_memop"},
+ {"111xx10"_b, "ldrsh_32_ldst_regoff"},
},
},
- { "UnallocFPImmediate",
+ { "_vvprhx",
+ {0},
+ { {"0"_b, "blr_64_branch_reg"},
+ },
+ },
+
+ { "_vvrmvg",
+ {12},
+ { {"1"_b, "_typysz"},
+ },
+ },
+
+ { "_vvtnrv",
+ {23, 22, 20, 19, 18},
+ { {"00000"_b, "orr_z_zi"},
+ {"01000"_b, "eor_z_zi"},
+ {"10000"_b, "and_z_zi"},
+ {"11000"_b, "dupm_z_i"},
+ },
+ },
+
+ { "_vvxsxt",
+ {4},
+ { {"0"_b, "ands_p_p_pp_z"},
+ {"1"_b, "bics_p_p_pp_z"},
+ },
+ },
+
+ { "_vxgzqy",
+ {22},
+ { {"0"_b, "ldrsw_64_ldst_regoff"},
+ },
+ },
+
+ { "_vxhgzz",
+ {23, 22, 12, 11, 10},
+ { {"00xxx"_b, "ext_z_zi_des"},
+ {"01xxx"_b, "ext_z_zi_con"},
+ {"10000"_b, "zip1_z_zz_q"},
+ {"10001"_b, "zip2_z_zz_q"},
+ {"10010"_b, "uzp1_z_zz_q"},
+ {"10011"_b, "uzp2_z_zz_q"},
+ {"10110"_b, "trn1_z_zz_q"},
+ {"10111"_b, "trn2_z_zz_q"},
+ },
+ },
+
+ { "_vxsjgg",
+ {30, 22, 11},
+ { {"001"_b, "_pxnnrz"},
+ {"010"_b, "ccmn_32_condcmp_reg"},
+ {"011"_b, "ccmn_32_condcmp_imm"},
+ {"110"_b, "ccmp_32_condcmp_reg"},
+ {"111"_b, "ccmp_32_condcmp_imm"},
+ },
+ },
+
+ { "_vxsvhs",
+ {13, 12},
+ { {"00"_b, "adcs_64_addsub_carry"},
+ },
+ },
+
+ { "_vxylhh",
{23, 22},
- { {"10", "VisitUnallocated"},
- {"otherwise", "UnallocFPImmediate_2"},
+ { {"01"_b, "fcmla_asimdelem_c_h"},
+ {"10"_b, "fcmla_asimdelem_c_s"},
+ },
+ },
+
+ { "_vylhvl",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fabs_h_floatdp1"},
+ {"000010"_b, "fsqrt_h_floatdp1"},
+ {"000100"_b, "fcvt_dh_floatdp1"},
+ {"001000"_b, "frintp_h_floatdp1"},
+ {"001010"_b, "frintz_h_floatdp1"},
+ {"001110"_b, "frinti_h_floatdp1"},
+ },
+ },
+
+ { "_vytgtz",
+ {13, 12, 11},
+ { {"000"_b, "fmaxnmp_asimdsamefp16_only"},
+ {"010"_b, "faddp_asimdsamefp16_only"},
+ {"011"_b, "fmul_asimdsamefp16_only"},
+ {"100"_b, "fcmge_asimdsamefp16_only"},
+ {"101"_b, "facge_asimdsamefp16_only"},
+ {"110"_b, "fmaxp_asimdsamefp16_only"},
+ {"111"_b, "fdiv_asimdsamefp16_only"},
+ },
+ },
+
+ { "_vytxll",
+ {18, 17, 12},
+ { {"000"_b, "st2_asisdlso_d2_2d"},
+ },
+ },
+
+ { "_vyygqs",
+ {23, 22, 20, 19, 12, 11, 10},
+ { {"00x1001"_b, "sqshrun_asisdshf_n"},
+ {"00x1011"_b, "sqrshrun_asisdshf_n"},
+ {"00x1101"_b, "uqshrn_asisdshf_n"},
+ {"00x1111"_b, "uqrshrn_asisdshf_n"},
+ {"00xx1x0"_b, "fmulx_asisdelem_rh_h"},
+ {"010x001"_b, "sqshrun_asisdshf_n"},
+ {"010x011"_b, "sqrshrun_asisdshf_n"},
+ {"010x101"_b, "uqshrn_asisdshf_n"},
+ {"010x111"_b, "uqrshrn_asisdshf_n"},
+ {"0111001"_b, "sqshrun_asisdshf_n"},
+ {"0111011"_b, "sqrshrun_asisdshf_n"},
+ {"0111101"_b, "uqshrn_asisdshf_n"},
+ {"0111111"_b, "uqrshrn_asisdshf_n"},
+ {"0x10001"_b, "sqshrun_asisdshf_n"},
+ {"0x10011"_b, "sqrshrun_asisdshf_n"},
+ {"0x10101"_b, "uqshrn_asisdshf_n"},
+ {"0x10111"_b, "uqrshrn_asisdshf_n"},
+ {"1xxx1x0"_b, "fmulx_asisdelem_r_sd"},
+ },
+ },
+
+ { "_vyztqx",
+ {8},
+ { {"0"_b, "tstart_br_systemresult"},
+ {"1"_b, "ttest_br_systemresult"},
+ },
+ },
+
+ { "_vzjvtv",
+ {23, 22, 12, 11, 10},
+ { {"01001"_b, "bfmmla_z_zzz"},
+ {"10001"_b, "fmmla_z_zzz_s"},
+ {"11001"_b, "fmmla_z_zzz_d"},
+ },
+ },
+
+ { "_vzzvlr",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_tlzlrj"},
+ {"0000001"_b, "_yhxvhy"},
+ {"0100000"_b, "_hqhzgj"},
+ {"0100001"_b, "_kzrklp"},
+ {"100xxx0"_b, "st2_asisdlsop_bx2_r2b"},
+ {"100xxx1"_b, "st4_asisdlsop_bx4_r4b"},
+ {"1010xx0"_b, "st2_asisdlsop_bx2_r2b"},
+ {"1010xx1"_b, "st4_asisdlsop_bx4_r4b"},
+ {"10110x0"_b, "st2_asisdlsop_bx2_r2b"},
+ {"10110x1"_b, "st4_asisdlsop_bx4_r4b"},
+ {"1011100"_b, "st2_asisdlsop_bx2_r2b"},
+ {"1011101"_b, "st4_asisdlsop_bx4_r4b"},
+ {"1011110"_b, "_mykjss"},
+ {"1011111"_b, "_xkkggt"},
+ {"110xxx0"_b, "ld2_asisdlsop_bx2_r2b"},
+ {"110xxx1"_b, "ld4_asisdlsop_bx4_r4b"},
+ {"1110xx0"_b, "ld2_asisdlsop_bx2_r2b"},
+ {"1110xx1"_b, "ld4_asisdlsop_bx4_r4b"},
+ {"11110x0"_b, "ld2_asisdlsop_bx2_r2b"},
+ {"11110x1"_b, "ld4_asisdlsop_bx4_r4b"},
+ {"1111100"_b, "ld2_asisdlsop_bx2_r2b"},
+ {"1111101"_b, "ld4_asisdlsop_bx4_r4b"},
+ {"1111110"_b, "_gvstrp"},
+ {"1111111"_b, "_qtgvhn"},
+ },
+ },
+
+ { "_xgvgmk",
+ {23, 22, 4},
+ { {"000"_b, "fccmp_s_floatccmp"},
+ {"001"_b, "fccmpe_s_floatccmp"},
+ {"010"_b, "fccmp_d_floatccmp"},
+ {"011"_b, "fccmpe_d_floatccmp"},
+ {"110"_b, "fccmp_h_floatccmp"},
+ {"111"_b, "fccmpe_h_floatccmp"},
+ },
+ },
+
+ { "_xhkgqh",
+ {30, 23, 22},
+ { {"000"_b, "stp_64_ldstpair_off"},
+ {"001"_b, "ldp_64_ldstpair_off"},
+ {"010"_b, "stp_64_ldstpair_pre"},
+ {"011"_b, "ldp_64_ldstpair_pre"},
+ },
+ },
+
+ { "_xhktsk",
+ {22},
+ { {"0"_b, "smullt_z_zzi_s"},
+ {"1"_b, "smullt_z_zzi_d"},
+ },
+ },
+
+ { "_xhlhmh",
+ {4},
+ { {"0"_b, "cmplo_p_p_zi"},
+ {"1"_b, "cmpls_p_p_zi"},
},
},
- { "UnallocFPImmediate_2",
- {31, 29, 9, 8, 7, 6, 5},
- { {"0000000", "VisitFPImmediate"},
- {"otherwise", "VisitUnallocated"},
+ { "_xhltxn",
+ {12, 10},
+ { {"00"_b, "_jqtltz"},
+ {"01"_b, "_rkvyqk"},
+ {"10"_b, "_zpnsrv"},
+ {"11"_b, "_lhvtrp"},
},
},
- { "UnallocFPIntegerConvert",
- {29},
- { {"0", "UnallocFPIntegerConvert_2"},
- {"1", "VisitUnallocated"},
+ { "_xhmpmy",
+ {4},
+ { {"0"_b, "and_p_p_pp_z"},
+ {"1"_b, "bic_p_p_pp_z"},
},
},
- { "UnallocFPIntegerConvert_2",
- {31, 23, 22, 20, 19, 18, 17, 16},
- { {"0001x11x", "VisitUnallocated"},
- {"0010x11x", "VisitUnallocated"},
- {"0011011x", "VisitUnallocated"},
- {"00111111", "VisitUnallocated"},
- {"010xx11x", "VisitUnallocated"},
- {"100xx11x", "VisitUnallocated"},
- {"1011x11x", "VisitUnallocated"},
- {"101x111x", "VisitUnallocated"},
- {"1101x11x", "VisitUnallocated"},
- {"110x011x", "VisitUnallocated"},
- {"xxx1x01x", "VisitUnallocated"},
- {"xxx1x10x", "VisitUnallocated"},
- {"xxxx110x", "VisitUnallocated"},
- {"xxxx101x", "VisitUnallocated"},
- {"otherwise", "VisitFPIntegerConvert"},
+ { "_xhvtjg",
+ {11},
+ { {"0"_b, "_mpyklp"},
},
},
- { "UnallocLoadLiteral",
- {26, 31, 30},
- { {"111", "VisitUnallocated"},
- {"otherwise", "VisitLoadLiteral"},
+ { "_xhxrnt",
+ {30},
+ { {"0"_b, "_zxhhny"},
+ {"1"_b, "_lhpgsn"},
},
},
- { "UnallocLoadStoreExclusive",
- {31, 23, 21, 14, 13, 12, 11, 10},
- { {"001xxxx0", "VisitUnallocated"},
- {"001xxx0x", "VisitUnallocated"},
- {"001xx0xx", "VisitUnallocated"},
- {"001x0xxx", "VisitUnallocated"},
- {"0010xxxx", "VisitUnallocated"},
- {"x11xxxx0", "VisitUnallocated"},
- {"x11xxx0x", "VisitUnallocated"},
- {"x11xx0xx", "VisitUnallocated"},
- {"x11x0xxx", "VisitUnallocated"},
- {"x110xxxx", "VisitUnallocated"},
- {"otherwise", "VisitLoadStoreExclusive"},
+ { "_xjghst",
+ {13, 12, 11, 10},
+ { {"0000"_b, "_kvmrng"},
+ {"0001"_b, "_vkyngx"},
+ {"0011"_b, "_lxqynh"},
+ {"0100"_b, "_kjngjl"},
+ {"0101"_b, "_xmqgmz"},
+ {"0110"_b, "uzp1_asimdperm_only"},
+ {"0111"_b, "_shzysp"},
+ {"1000"_b, "_strkph"},
+ {"1001"_b, "_jpvljz"},
+ {"1010"_b, "trn1_asimdperm_only"},
+ {"1011"_b, "_jryylt"},
+ {"1100"_b, "_grxzzg"},
+ {"1101"_b, "_lnnyzt"},
+ {"1110"_b, "zip1_asimdperm_only"},
+ {"1111"_b, "_szttjy"},
},
},
- { "UnallocLoadStorePAC",
- {31, 30, 26},
- { {"110", "VisitLoadStorePAC"},
- {"otherwise", "VisitUnallocated"},
+ { "_xjxppp",
+ {1, 0},
+ { {"11"_b, "brabz_64_branch_reg"},
},
},
- { "UnallocLoadStoreRCpcUnscaledOffset",
- {31, 30, 23, 22, 11, 10},
- { {"xxxxx1", "VisitUnallocated"},
- {"xxxx1x", "VisitUnallocated"},
- {"101100", "VisitUnallocated"},
- {"111000", "VisitUnallocated"},
- {"111100", "VisitUnallocated"},
- {"otherwise", "VisitLoadStoreRCpcUnscaledOffset"},
+ { "_xkkggt",
+ {17},
+ { {"0"_b, "st4_asisdlsop_bx4_r4b"},
+ {"1"_b, "st4_asisdlsop_b4_i4b"},
},
},
- { "UnallocLoadStorePairNonTemporal",
- {26, 31, 30, 22},
- { {"001x", "VisitUnallocated"},
- {"x11x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePairNonTemporal"},
+ { "_xlhjhx",
+ {30},
+ { {"0"_b, "bl_only_branch_imm"},
+ {"1"_b, "_zhrtts"},
},
},
- { "UnallocLoadStorePairOffset",
- {26, 31, 30, 22},
- { {"0010", "VisitUnallocated"},
- {"x11x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePairOffset"},
+ { "_xmqgmz",
+ {23, 22},
+ { {"01"_b, "fadd_asimdsamefp16_only"},
+ {"11"_b, "fsub_asimdsamefp16_only"},
},
},
- { "UnallocLoadStorePairPostIndex",
- {26, 31, 30, 22},
- { {"0010", "VisitUnallocated"},
- {"x11x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePairPostIndex"},
+ { "_xmqvpl",
+ {12},
+ { {"0"_b, "ld1_asisdlsop_dx1_r1d"},
},
},
- { "UnallocLoadStorePairPreIndex",
- {26, 31, 30, 22},
- { {"0010", "VisitUnallocated"},
- {"x11x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePairPreIndex"},
+ { "_xmtlmj",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "srshr_asisdshf_r"},
+ {"001x0"_b, "srshr_asisdshf_r"},
+ {"01xx0"_b, "srshr_asisdshf_r"},
},
},
- { "UnallocLoadStorePostIndex",
- {26, 23, 22, 31, 30},
- { {"01011", "VisitUnallocated"},
- {"0111x", "VisitUnallocated"},
- {"11xx1", "VisitUnallocated"},
- {"11x1x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePostIndex"},
+ { "_xmxpnx",
+ {10},
+ { {"0"_b, "sri_z_zzi"},
+ {"1"_b, "sli_z_zzi"},
},
},
- { "UnallocLoadStorePreIndex",
- {26, 23, 22, 31, 30},
- { {"01011", "VisitUnallocated"},
- {"0111x", "VisitUnallocated"},
- {"11xx1", "VisitUnallocated"},
- {"11x1x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStorePreIndex"},
+ { "_xnsrny",
+ {30, 23, 22},
+ { {"000"_b, "madd_64a_dp_3src"},
+ {"001"_b, "smulh_64_dp_3src"},
+ {"011"_b, "umulh_64_dp_3src"},
},
},
- { "UnallocLoadStoreRegisterOffset",
- {14},
- { {"0", "VisitUnallocated"},
- {"1", "UnallocLoadStoreRegisterOffset_2"},
+ { "_xpkkpn",
+ {17},
+ { {"1"_b, "frsqrte_z_z"},
},
},
- { "UnallocLoadStoreRegisterOffset_2",
- {26, 23, 22, 31, 30},
- { {"0111x", "VisitUnallocated"},
- {"11xx1", "VisitUnallocated"},
- {"11x1x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStoreRegisterOffset"},
+ { "_xpmvjv",
+ {13, 12},
+ { {"00"_b, "sqshl_asisdsame_only"},
+ {"01"_b, "sqrshl_asisdsame_only"},
},
},
- { "UnallocLoadStoreUnscaledOffset",
- {26, 23, 22, 31, 30},
- { {"0111x", "VisitUnallocated"},
- {"11xx1", "VisitUnallocated"},
- {"11x1x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStoreUnscaledOffset"},
+ { "_xpqglq",
+ {4},
+ { {"0"_b, "cmpeq_p_p_zi"},
+ {"1"_b, "cmpne_p_p_zi"},
},
},
- { "UnallocLoadStoreUnsignedOffset",
- {26, 23, 22, 31, 30},
- { {"0111x", "VisitUnallocated"},
- {"11xx1", "VisitUnallocated"},
- {"11x1x", "VisitUnallocated"},
- {"otherwise", "VisitLoadStoreUnsignedOffset"},
+ { "_xprlgy",
+ {30, 23, 22, 11, 10},
+ { {"00010"_b, "str_s_ldst_regoff"},
+ {"00110"_b, "ldr_s_ldst_regoff"},
+ {"10010"_b, "str_d_ldst_regoff"},
+ {"10110"_b, "ldr_d_ldst_regoff"},
},
},
- { "UnallocLogicalImmediate",
- {31, 22},
- { {"01", "VisitUnallocated"},
- {"otherwise", "VisitLogicalImmediate"},
+ { "_xpvpqq",
+ {23, 22, 11, 10, 4, 3, 2},
+ { {"0000000"_b, "_hngpxg"},
+ {"0010111"_b, "_gnytkh"},
+ {"0011111"_b, "_xjxppp"},
+ {"0100000"_b, "_nnhprs"},
+ {"0110111"_b, "_hmtxlh"},
+ {"0111111"_b, "_qtxypt"},
+ {"1000000"_b, "_rmltms"},
+ {"1010111"_b, "_qqpkkm"},
+ {"1011111"_b, "_klnhpj"},
},
},
- { "UnallocLogicalShifted",
- {31, 15},
- { {"01", "VisitUnallocated"},
- {"otherwise", "VisitLogicalShifted"},
+ { "_xqgxjp",
+ {18, 17, 16, 13, 12, 11, 10, 9, 7, 6, 5},
+ { {"01111000011"_b, "_vyztqx"},
},
},
- { "UnallocMoveWideImmediate",
- {30, 29},
- { {"01", "VisitUnallocated"},
- {"otherwise", "UnallocMoveWideImmediate_2"},
+ { "_xqhgkk",
+ {30},
+ { {"0"_b, "b_only_branch_imm"},
},
},
- { "UnallocMoveWideImmediate_2",
- {31, 22},
- { {"01", "VisitUnallocated"},
- {"otherwise", "VisitMoveWideImmediate"},
+ { "_xqjrgk",
+ {12},
+ { {"0"_b, "ld4_asisdlsop_dx4_r4d"},
},
},
- { "UnallocNEON2RegMisc",
- {16, 15, 14, 13, 12, 23, 22, 29},
- { {"00001xx1", "VisitUnallocated"},
- {"001011x1", "VisitUnallocated"},
- {"01010xx1", "VisitUnallocated"},
- {"011xx0xx", "VisitUnallocated"},
- {"011101x1", "VisitUnallocated"},
- {"1000xxxx", "VisitUnallocated"},
- {"10011xx0", "VisitUnallocated"},
- {"10101xxx", "VisitUnallocated"},
- {"101101xx", "VisitUnallocated"},
- {"101110x1", "VisitUnallocated"},
- {"101111xx", "VisitUnallocated"},
- {"110001x1", "VisitUnallocated"},
- {"111101xx", "VisitUnallocated"},
- {"111111x0", "VisitUnallocated"},
- {"otherwise", "VisitNEON2RegMisc"},
+ { "_xrhhjz",
+ {11},
+ { {"0"_b, "_hzxjsp"},
},
},
- { "UnallocNEON2RegMiscFP16",
- {29, 23, 22, 16, 15, 14, 13, 12},
- { {"xx0xxxxx", "VisitUnallocated"},
- {"xxx00xxx", "VisitUnallocated"},
- {"xxx010xx", "VisitUnallocated"},
- {"xxx10xxx", "VisitUnallocated"},
- {"xxx11110", "VisitUnallocated"},
- {"x0x011xx", "VisitUnallocated"},
- {"x0x11111", "VisitUnallocated"},
- {"x1x11100", "VisitUnallocated"},
- {"01x11111", "VisitUnallocated"},
- {"11x01110", "VisitUnallocated"},
- {"11x11000", "VisitUnallocated"},
- {"otherwise", "VisitNEON2RegMiscFP16"},
+ { "_xrhmtg",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "stur_s_ldst_unscaled"},
+ {"00001"_b, "str_s_ldst_immpost"},
+ {"00011"_b, "str_s_ldst_immpre"},
+ {"00100"_b, "ldur_s_ldst_unscaled"},
+ {"00101"_b, "ldr_s_ldst_immpost"},
+ {"00111"_b, "ldr_s_ldst_immpre"},
+ {"10000"_b, "stur_d_ldst_unscaled"},
+ {"10001"_b, "str_d_ldst_immpost"},
+ {"10011"_b, "str_d_ldst_immpre"},
+ {"10100"_b, "ldur_d_ldst_unscaled"},
+ {"10101"_b, "ldr_d_ldst_immpost"},
+ {"10111"_b, "ldr_d_ldst_immpre"},
},
},
- { "UnallocNEON3Different",
- {15, 14, 13, 12, 29},
- { {"10011", "VisitUnallocated"},
- {"10111", "VisitUnallocated"},
- {"11011", "VisitUnallocated"},
- {"11101", "VisitUnallocated"},
- {"1111x", "VisitUnallocated"},
- {"otherwise", "VisitNEON3Different"},
- },
- },
-
- { "UnallocNEON3Same",
- {29, 23, 22, 15, 14, 13, 12, 11},
- { {"00111101", "VisitUnallocated"},
- {"01x11011", "VisitUnallocated"},
- {"01x11100", "VisitUnallocated"},
- {"01111101", "VisitUnallocated"},
- {"1xx10111", "VisitUnallocated"},
- {"10111001", "VisitUnallocated"},
- {"11x11011", "VisitUnallocated"},
- {"11x11111", "VisitUnallocated"},
- {"11111001", "VisitUnallocated"},
- {"otherwise", "VisitNEON3Same"},
- },
- },
-
- { "UnallocNEON3SameExtra",
- {29, 14, 13, 12, 11},
- { {"x0011", "VisitUnallocated"},
- {"x01xx", "VisitUnallocated"},
- {"00000", "VisitUnallocated"},
- {"00001", "VisitUnallocated"},
- {"01xxx", "VisitUnallocated"},
- {"111x1", "VisitUnallocated"},
- {"otherwise", "VisitNEON3SameExtra"},
- },
- },
-
- { "UnallocNEON3SameFP16",
- {29, 23, 13, 12, 11},
- { {"00101", "VisitUnallocated"},
- {"01011", "VisitUnallocated"},
- {"01100", "VisitUnallocated"},
- {"01101", "VisitUnallocated"},
- {"10001", "VisitUnallocated"},
- {"11001", "VisitUnallocated"},
- {"11011", "VisitUnallocated"},
- {"11111", "VisitUnallocated"},
- {"otherwise", "VisitNEON3SameFP16"},
- },
- },
-
- { "UnallocNEONAcrossLanes",
- {29, 23, 22, 16, 15, 14, 13, 12},
- { {"xxx0000x", "VisitUnallocated"},
- {"xxx00010", "VisitUnallocated"},
- {"xxx001xx", "VisitUnallocated"},
- {"xxx0100x", "VisitUnallocated"},
- {"xxx01011", "VisitUnallocated"},
- {"xxx01101", "VisitUnallocated"},
- {"xxx01110", "VisitUnallocated"},
- {"xxx10xxx", "VisitUnallocated"},
- {"xxx1100x", "VisitUnallocated"},
- {"xxx111xx", "VisitUnallocated"},
- {"00101100", "VisitUnallocated"},
- {"00101111", "VisitUnallocated"},
- {"01101100", "VisitUnallocated"},
- {"01101111", "VisitUnallocated"},
- {"1xx11011", "VisitUnallocated"},
- {"otherwise", "VisitNEONAcrossLanes"},
- },
- },
-
- { "UnallocNEONByIndexedElement",
- {29, 23, 22, 15, 14, 13, 12},
- { {"0010001", "VisitUnallocated"},
- {"0010101", "VisitUnallocated"},
- {"0011001", "VisitUnallocated"},
- {"00x0000", "VisitUnallocated"},
- {"00x0100", "VisitUnallocated"},
- {"0xx1111", "VisitUnallocated"},
- {"1000001", "VisitUnallocated"},
- {"1000011", "VisitUnallocated"},
- {"1000101", "VisitUnallocated"},
- {"1000111", "VisitUnallocated"},
- {"10x1000", "VisitUnallocated"},
- {"10x1100", "VisitUnallocated"},
- {"1110001", "VisitUnallocated"},
- {"1110011", "VisitUnallocated"},
- {"1110101", "VisitUnallocated"},
- {"1110111", "VisitUnallocated"},
- {"1xx1011", "VisitUnallocated"},
- {"x011001", "VisitUnallocated"},
- {"otherwise", "VisitNEONByIndexedElement"},
- },
- },
-
- { "UnallocNEONCopy",
- {14, 13, 12, 11, 30, 29},
- { {"xxxx01", "VisitUnallocated"},
- {"0010x0", "VisitUnallocated"},
- {"001100", "VisitUnallocated"},
- {"0100x0", "VisitUnallocated"},
- {"0110x0", "VisitUnallocated"},
- {"1xxxx0", "VisitUnallocated"},
- {"otherwise", "UnallocNEONCopy_2"},
- },
- },
-
- { "UnallocNEONCopy_2",
- {19, 18, 17, 16},
- { {"0000", "VisitUnallocated"},
- {"otherwise", "VisitNEONCopy"},
+ { "_xrpmzt",
+ {17},
+ { {"0"_b, "st4_asisdlsop_hx4_r4h"},
+ {"1"_b, "st4_asisdlsop_h4_i4h"},
},
},
- { "UnallocNEONExtract",
+ { "_xrxvpr",
{23, 22},
- { {"00", "VisitNEONExtract"},
- {"otherwise", "VisitUnallocated"},
- },
- },
-
- { "UnallocNEONLoadStoreMultiStruct",
- {22, 15, 14, 13, 12},
- { {"00001", "VisitUnallocated"},
- {"00011", "VisitUnallocated"},
- {"00101", "VisitUnallocated"},
- {"01001", "VisitUnallocated"},
- {"01011", "VisitUnallocated"},
- {"011xx", "VisitUnallocated"},
- {"10001", "VisitUnallocated"},
- {"10011", "VisitUnallocated"},
- {"10101", "VisitUnallocated"},
- {"11001", "VisitUnallocated"},
- {"11011", "VisitUnallocated"},
- {"111xx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreMultiStruct"},
- },
- },
-
- { "UnallocNEONLoadStoreMultiStructPostIndex",
- {22, 15, 14, 13, 12},
- { {"00001", "VisitUnallocated"},
- {"00011", "VisitUnallocated"},
- {"00101", "VisitUnallocated"},
- {"01001", "VisitUnallocated"},
- {"01011", "VisitUnallocated"},
- {"011xx", "VisitUnallocated"},
- {"10001", "VisitUnallocated"},
- {"10011", "VisitUnallocated"},
- {"10101", "VisitUnallocated"},
- {"11001", "VisitUnallocated"},
- {"11011", "VisitUnallocated"},
- {"111xx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreMultiStructPostIndex"},
- },
- },
-
- { "UnallocNEONLoadSingleStruct",
- {21, 15, 14, 13, 12, 11, 10},
- { {"0010xx1", "VisitUnallocated"},
- {"0011xx1", "VisitUnallocated"},
- {"0100101", "VisitUnallocated"},
- {"0100x1x", "VisitUnallocated"},
- {"0101011", "VisitUnallocated"},
- {"01011x1", "VisitUnallocated"},
- {"0101x10", "VisitUnallocated"},
- {"01101xx", "VisitUnallocated"},
- {"01111xx", "VisitUnallocated"},
- {"1010xx1", "VisitUnallocated"},
- {"1011xx1", "VisitUnallocated"},
- {"1100011", "VisitUnallocated"},
- {"11001x1", "VisitUnallocated"},
- {"1100x10", "VisitUnallocated"},
- {"1101011", "VisitUnallocated"},
- {"11011x1", "VisitUnallocated"},
- {"1101x10", "VisitUnallocated"},
- {"11101xx", "VisitUnallocated"},
- {"11111xx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreSingleStruct"},
- },
- },
-
- { "UnallocNEONLoadStoreSingleStruct",
+ { {"00"_b, "_spmkmm"},
+ },
+ },
+
+ { "_xryzqs",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0001111"_b, "caspl_cp32_ldstexcl"},
+ {"0011111"_b, "caspal_cp32_ldstexcl"},
+ {"0101111"_b, "caslb_c32_ldstexcl"},
+ {"0111111"_b, "casalb_c32_ldstexcl"},
+ {"1001111"_b, "caspl_cp64_ldstexcl"},
+ {"1011111"_b, "caspal_cp64_ldstexcl"},
+ {"1101111"_b, "caslh_c32_ldstexcl"},
+ {"1111111"_b, "casalh_c32_ldstexcl"},
+ },
+ },
+
+ { "_xsgxyy",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "autizb_64z_dp_1src"},
+ },
+ },
+
+ { "_xstkrn",
+ {20, 19},
+ { {"00"_b, "_hrllsn"},
+ {"01"_b, "_kqvljp"},
+ {"10"_b, "_lxhlkx"},
+ {"11"_b, "_rjysnh"},
+ },
+ },
+
+ { "_xtgtyz",
+ {19, 18, 17, 16},
+ { {"0000"_b, "brkb_p_p_p"},
+ },
+ },
+
+ { "_xtqmyj",
+ {30, 23, 22},
+ { {"000"_b, "orr_32_log_imm"},
+ {"100"_b, "ands_32s_log_imm"},
+ {"110"_b, "movk_32_movewide"},
+ },
+ },
+
+ { "_xtxyxj",
+ {4},
+ { {"0"_b, "orr_p_p_pp_z"},
+ {"1"_b, "orn_p_p_pp_z"},
+ },
+ },
+
+ { "_xtzlzy",
+ {12, 11, 10},
+ { {"000"_b, "fadd_z_zz"},
+ {"001"_b, "fsub_z_zz"},
+ {"010"_b, "fmul_z_zz"},
+ {"011"_b, "ftsmul_z_zz"},
+ {"110"_b, "frecps_z_zz"},
+ {"111"_b, "frsqrts_z_zz"},
+ },
+ },
+
+ { "_xvlnmy",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "autdza_64z_dp_1src"},
+ },
+ },
+
+ { "_xvnyxq",
+ {30, 23, 13, 4},
+ { {"0000"_b, "prfb_i_p_bz_s_x32_scaled"},
+ {"0010"_b, "prfh_i_p_bz_s_x32_scaled"},
+ {"010x"_b, "ld1sh_z_p_bz_s_x32_scaled"},
+ {"011x"_b, "ldff1sh_z_p_bz_s_x32_scaled"},
+ {"1000"_b, "prfb_i_p_bz_d_x32_scaled"},
+ {"1010"_b, "prfh_i_p_bz_d_x32_scaled"},
+ {"110x"_b, "ld1sh_z_p_bz_d_x32_scaled"},
+ {"111x"_b, "ldff1sh_z_p_bz_d_x32_scaled"},
+ },
+ },
+
+ { "_xvppmm",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0xx0xxx"_b, "mla_z_p_zzz"},
+ {"0xx1xxx"_b, "mls_z_p_zzz"},
+ {"1101110"_b, "usdot_z_zzz_s"},
+ {"1xx0000"_b, "smlalb_z_zzz"},
+ {"1xx0001"_b, "smlalt_z_zzz"},
+ {"1xx0010"_b, "umlalb_z_zzz"},
+ {"1xx0011"_b, "umlalt_z_zzz"},
+ {"1xx0100"_b, "smlslb_z_zzz"},
+ {"1xx0101"_b, "smlslt_z_zzz"},
+ {"1xx0110"_b, "umlslb_z_zzz"},
+ {"1xx0111"_b, "umlslt_z_zzz"},
+ {"1xx1000"_b, "sqdmlalb_z_zzz"},
+ {"1xx1001"_b, "sqdmlalt_z_zzz"},
+ {"1xx1010"_b, "sqdmlslb_z_zzz"},
+ {"1xx1011"_b, "sqdmlslt_z_zzz"},
+ {"1xx1100"_b, "sqrdmlah_z_zzz"},
+ {"1xx1101"_b, "sqrdmlsh_z_zzz"},
+ },
+ },
+
+ { "_xxjrsy",
+ {23, 22, 9},
+ { {"000"_b, "rdffr_p_p_f"},
+ {"010"_b, "rdffrs_p_p_f"},
+ },
+ },
+
+ { "_xxkvsy",
+ {30, 22, 11, 10},
+ { {"0000"_b, "csel_64_condsel"},
+ {"0001"_b, "csinc_64_condsel"},
+ {"0111"_b, "_tnxlnl"},
+ {"1000"_b, "csinv_64_condsel"},
+ {"1001"_b, "csneg_64_condsel"},
+ {"1100"_b, "_qjyvln"},
+ {"1101"_b, "_nvthzh"},
+ },
+ },
+
+ { "_xxpqgg",
+ {30, 23, 22},
+ { {"001"_b, "sbfm_64m_bitfield"},
+ {"011"_b, "extr_64_extract"},
+ {"101"_b, "ubfm_64m_bitfield"},
+ },
+ },
+
+ { "_xxpzrl",
+ {13},
+ { {"0"_b, "mls_asimdelem_r"},
+ {"1"_b, "umlsl_asimdelem_l"},
+ },
+ },
+
+ { "_xxxxlh",
+ {4},
+ { {"0"_b, "ccmn_64_condcmp_imm"},
+ },
+ },
+
+ { "_xxyklv",
+ {23, 22, 13, 12, 11, 10},
+ { {"000000"_b, "tbl_asimdtbl_l3_3"},
+ {"000100"_b, "tbx_asimdtbl_l3_3"},
+ {"001000"_b, "tbl_asimdtbl_l4_4"},
+ {"001100"_b, "tbx_asimdtbl_l4_4"},
+ {"xx0110"_b, "uzp2_asimdperm_only"},
+ {"xx1010"_b, "trn2_asimdperm_only"},
+ {"xx1110"_b, "zip2_asimdperm_only"},
+ },
+ },
+
+ { "_xygxsv",
+ {17},
+ { {"0"_b, "ld3_asisdlsop_hx3_r3h"},
+ {"1"_b, "ld3_asisdlsop_h3_i3h"},
+ },
+ },
+
+ { "_xyhmgh",
+ {23, 22, 20, 9},
+ { {"0000"_b, "_xhmpmy"},
+ {"0001"_b, "_qnprqt"},
+ {"0010"_b, "_nnzhgm"},
+ {"0100"_b, "_vvxsxt"},
+ {"0101"_b, "_yzmjhn"},
+ {"0110"_b, "_mkgsly"},
+ {"1000"_b, "_xtxyxj"},
+ {"1001"_b, "_hmtmlq"},
+ {"1010"_b, "_xtgtyz"},
+ {"1100"_b, "_yynmjl"},
+ {"1101"_b, "_sjnspg"},
+ {"1110"_b, "_jzjvtv"},
+ },
+ },
+
+ { "_xyhxzt",
{22},
- { {"0", "UnallocNEONStoreSingleStruct"},
- {"1", "UnallocNEONLoadSingleStruct"},
- },
- },
-
- { "UnallocNEONLoadSingleStructPostIndex",
- {21, 15, 14, 13, 12, 11, 10},
- { {"0010xx1", "VisitUnallocated"},
- {"0011xx1", "VisitUnallocated"},
- {"0100101", "VisitUnallocated"},
- {"0100x1x", "VisitUnallocated"},
- {"0101011", "VisitUnallocated"},
- {"01011x1", "VisitUnallocated"},
- {"0101x10", "VisitUnallocated"},
- {"01101xx", "VisitUnallocated"},
- {"01111xx", "VisitUnallocated"},
- {"1010xx1", "VisitUnallocated"},
- {"1011xx1", "VisitUnallocated"},
- {"1100011", "VisitUnallocated"},
- {"11001x1", "VisitUnallocated"},
- {"1100x10", "VisitUnallocated"},
- {"1101011", "VisitUnallocated"},
- {"11011x1", "VisitUnallocated"},
- {"1101x10", "VisitUnallocated"},
- {"11101xx", "VisitUnallocated"},
- {"11111xx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreSingleStructPostIndex"},
- },
- },
-
- { "UnallocNEONLoadStoreSingleStructPostIndex",
+ { {"0"_b, "prfm_p_ldst_regoff"},
+ },
+ },
+
+ { "_xyljvp",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "_yjpstj"},
+ {"01000"_b, "csel_64_condsel"},
+ {"01001"_b, "csinc_64_condsel"},
+ {"01100"_b, "_qghmks"},
+ {"01101"_b, "_qzzlpv"},
+ {"01110"_b, "_syktsg"},
+ {"01111"_b, "_hjtvvm"},
+ {"10000"_b, "_pvrylp"},
+ {"11000"_b, "csinv_64_condsel"},
+ {"11001"_b, "csneg_64_condsel"},
+ {"11100"_b, "_kkgpjl"},
+ {"11101"_b, "_tjtgjy"},
+ {"11110"_b, "_qmzqsy"},
+ {"11111"_b, "_nmkqzt"},
+ },
+ },
+
+ { "_xylmmp",
+ {22, 12},
+ { {"10"_b, "_nkjgpq"},
+ },
+ },
+
+ { "_xyzpvp",
+ {23, 22, 13},
+ { {"100"_b, "fmlsl_asimdelem_lh"},
+ {"xx1"_b, "smlsl_asimdelem_l"},
+ },
+ },
+
+ { "_xzmjxk",
+ {30},
+ { {"1"_b, "_sntzjg"},
+ },
+ },
+
+ { "_xznsqh",
+ {22, 20, 11},
+ { {"000"_b, "cntw_r_s"},
+ {"010"_b, "incw_r_rs"},
+ {"100"_b, "cntd_r_s"},
+ {"110"_b, "incd_r_rs"},
+ },
+ },
+
+ { "_xzyxnr",
+ {30, 23, 22, 11, 10},
+ { {"10001"_b, "stg_64spost_ldsttags"},
+ {"10010"_b, "stg_64soffset_ldsttags"},
+ {"10011"_b, "stg_64spre_ldsttags"},
+ {"10100"_b, "ldg_64loffset_ldsttags"},
+ {"10101"_b, "stzg_64spost_ldsttags"},
+ {"10110"_b, "stzg_64soffset_ldsttags"},
+ {"10111"_b, "stzg_64spre_ldsttags"},
+ {"11001"_b, "st2g_64spost_ldsttags"},
+ {"11010"_b, "st2g_64soffset_ldsttags"},
+ {"11011"_b, "st2g_64spre_ldsttags"},
+ {"11101"_b, "stz2g_64spost_ldsttags"},
+ {"11110"_b, "stz2g_64soffset_ldsttags"},
+ {"11111"_b, "stz2g_64spre_ldsttags"},
+ },
+ },
+
+ { "_xzyylk",
+ {20, 19, 18, 17, 16, 13},
+ { {"000000"_b, "fabs_s_floatdp1"},
+ {"000010"_b, "fsqrt_s_floatdp1"},
+ {"000100"_b, "fcvt_ds_floatdp1"},
+ {"000110"_b, "fcvt_hs_floatdp1"},
+ {"001000"_b, "frintp_s_floatdp1"},
+ {"001010"_b, "frintz_s_floatdp1"},
+ {"001110"_b, "frinti_s_floatdp1"},
+ {"010000"_b, "frint32x_s_floatdp1"},
+ {"010010"_b, "frint64x_s_floatdp1"},
+ },
+ },
+
+ { "_ygjslq",
+ {4, 3, 2, 1, 0},
+ { {"00000"_b, "fcmp_h_floatcmp"},
+ {"01000"_b, "fcmp_hz_floatcmp"},
+ {"10000"_b, "fcmpe_h_floatcmp"},
+ {"11000"_b, "fcmpe_hz_floatcmp"},
+ },
+ },
+
+ { "_ygnypk",
+ {22, 12},
+ { {"10"_b, "_nqlgtn"},
+ },
+ },
+
+ { "_ygpjrl",
+ {13, 12},
+ { {"00"_b, "adc_32_addsub_carry"},
+ },
+ },
+
+ { "_ygxhyg",
+ {23, 22, 4},
+ { {"000"_b, "fccmp_s_floatccmp"},
+ {"001"_b, "fccmpe_s_floatccmp"},
+ {"010"_b, "fccmp_d_floatccmp"},
+ {"011"_b, "fccmpe_d_floatccmp"},
+ {"110"_b, "fccmp_h_floatccmp"},
+ {"111"_b, "fccmpe_h_floatccmp"},
+ },
+ },
+
+ { "_ygyxvx",
+ {18, 17},
+ { {"00"_b, "ld2_asisdlso_s2_2s"},
+ },
+ },
+
+ { "_yhlntp",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "fexpa_z_z"},
+ },
+ },
+
+ { "_yhmlxk",
+ {13, 12, 11, 10},
+ { {"0000"_b, "decp_z_p_z"},
+ {"0010"_b, "decp_r_p_r"},
+ },
+ },
+
+ { "_yhqyzj",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_d_floatimm"},
+ },
+ },
+
+ { "_yhxvhy",
+ {17},
+ { {"0"_b, "st4_asisdlso_b4_4b"},
+ },
+ },
+
+ { "_yjjrgg",
+ {30},
+ { {"0"_b, "cbnz_64_compbranch"},
+ },
+ },
+
+ { "_yjmngt",
+ {30},
+ { {"0"_b, "sel_z_p_zz"},
+ {"1"_b, "_vpmxrj"},
+ },
+ },
+
+ { "_yjpstj",
+ {13, 12},
+ { {"00"_b, "adc_64_addsub_carry"},
+ },
+ },
+
+ { "_yjsjvt",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "_vxsvhs"},
+ {"00001"_b, "_rhzhyz"},
+ {"00100"_b, "_zjsgkm"},
+ {"00110"_b, "_xxxxlh"},
+ {"01100"_b, "_mtjrtt"},
+ {"10000"_b, "_yskkjs"},
+ {"10100"_b, "_mjxzks"},
+ {"10110"_b, "_tpkzxg"},
+ },
+ },
+
+ { "_yjxshz",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "stlurb_32_ldapstl_unscaled"},
+ {"00100"_b, "ldapurb_32_ldapstl_unscaled"},
+ {"01000"_b, "ldapursb_64_ldapstl_unscaled"},
+ {"01100"_b, "ldapursb_32_ldapstl_unscaled"},
+ {"10000"_b, "stlurh_32_ldapstl_unscaled"},
+ {"10100"_b, "ldapurh_32_ldapstl_unscaled"},
+ {"11000"_b, "ldapursh_64_ldapstl_unscaled"},
+ {"11100"_b, "ldapursh_32_ldapstl_unscaled"},
+ },
+ },
+
+ { "_yjxvkp",
+ {18, 17, 12},
+ { {"0x0"_b, "st4_asisdlsop_dx4_r4d"},
+ {"100"_b, "st4_asisdlsop_dx4_r4d"},
+ {"110"_b, "st4_asisdlsop_d4_i4d"},
+ },
+ },
+
+ { "_yjzknm",
+ {13, 12, 11, 10},
+ { {"0000"_b, "uqdecp_z_p_z"},
+ {"0010"_b, "uqdecp_r_p_r_uw"},
+ {"0011"_b, "uqdecp_r_p_r_x"},
+ },
+ },
+
+ { "_yjztsq",
+ {20, 19, 18, 17, 16},
+ { {"11111"_b, "st64b_64l_memop"},
+ },
+ },
+
+ { "_ylhxlt",
+ {30},
+ { {"0"_b, "ldrsw_64_loadlit"},
+ {"1"_b, "prfm_p_loadlit"},
+ },
+ },
+
+ { "_ylnsvy",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "dup_z_r"},
+ {"00100"_b, "insr_z_r"},
+ {"10000"_b, "sunpklo_z_z"},
+ {"10001"_b, "sunpkhi_z_z"},
+ {"10010"_b, "uunpklo_z_z"},
+ {"10011"_b, "uunpkhi_z_z"},
+ {"10100"_b, "insr_z_v"},
+ {"11000"_b, "rev_z_z"},
+ },
+ },
+
+ { "_ylqnqt",
+ {18, 17, 12},
+ { {"000"_b, "ld4_asisdlso_d4_4d"},
+ },
+ },
+
+ { "_ylyskq",
+ {13, 12, 11, 10},
+ { {"0011"_b, "uqadd_asisdsame_only"},
+ {"1010"_b, "_yzqtyl"},
+ {"1011"_b, "uqsub_asisdsame_only"},
+ {"1101"_b, "cmhi_asisdsame_only"},
+ {"1110"_b, "_jxzrxm"},
+ {"1111"_b, "cmhs_asisdsame_only"},
+ },
+ },
+
+ { "_ymgrgx",
+ {22, 20, 19, 18, 17, 16},
+ { {"111001"_b, "ucvtf_asisdmiscfp16_r"},
+ {"x00001"_b, "ucvtf_asisdmisc_r"},
+ {"x10000"_b, "faddp_asisdpair_only_sd"},
+ },
+ },
+
+ { "_ymhgxg",
+ {30, 13},
+ { {"00"_b, "_yrmmmg"},
+ {"01"_b, "_sghgtk"},
+ {"10"_b, "_nxjkqs"},
+ {"11"_b, "_yvyhlh"},
+ },
+ },
+
+ { "_ymhkrx",
+ {30, 23, 22, 13, 4},
+ { {"0000x"_b, "ld1b_z_p_ai_s"},
+ {"0001x"_b, "ldff1b_z_p_ai_s"},
+ {"0010x"_b, "ld1rb_z_p_bi_u32"},
+ {"0011x"_b, "ld1rb_z_p_bi_u64"},
+ {"0100x"_b, "ld1h_z_p_ai_s"},
+ {"0101x"_b, "ldff1h_z_p_ai_s"},
+ {"0110x"_b, "ld1rh_z_p_bi_u32"},
+ {"0111x"_b, "ld1rh_z_p_bi_u64"},
+ {"1000x"_b, "ld1b_z_p_ai_d"},
+ {"1001x"_b, "ldff1b_z_p_ai_d"},
+ {"10100"_b, "prfw_i_p_bz_d_64_scaled"},
+ {"10110"_b, "prfd_i_p_bz_d_64_scaled"},
+ {"1100x"_b, "ld1h_z_p_ai_d"},
+ {"1101x"_b, "ldff1h_z_p_ai_d"},
+ {"1110x"_b, "ld1h_z_p_bz_d_64_scaled"},
+ {"1111x"_b, "ldff1h_z_p_bz_d_64_scaled"},
+ },
+ },
+
+ { "_ymkthj",
+ {20, 9, 4},
+ { {"000"_b, "uzp2_p_pp"},
+ },
+ },
+
+ { "_ympyng",
+ {30, 23, 22, 13},
+ { {"0000"_b, "ld1sh_z_p_br_s64"},
+ {"0001"_b, "ldff1sh_z_p_br_s64"},
+ {"0010"_b, "ld1w_z_p_br_u32"},
+ {"0011"_b, "ldff1w_z_p_br_u32"},
+ {"0100"_b, "ld1sb_z_p_br_s64"},
+ {"0101"_b, "ldff1sb_z_p_br_s64"},
+ {"0110"_b, "ld1sb_z_p_br_s16"},
+ {"0111"_b, "ldff1sb_z_p_br_s16"},
+ {"1001"_b, "stnt1w_z_p_br_contiguous"},
+ {"1011"_b, "st3w_z_p_br_contiguous"},
+ {"10x0"_b, "st1w_z_p_br"},
+ {"1100"_b, "str_z_bi"},
+ {"1101"_b, "stnt1d_z_p_br_contiguous"},
+ {"1111"_b, "st3d_z_p_br_contiguous"},
+ },
+ },
+
+ { "_ymznlj",
+ {13, 10},
+ { {"00"_b, "_vgrtjz"},
+ {"01"_b, "_kxjgsz"},
+ {"10"_b, "_vmjtrx"},
+ {"11"_b, "_tgmljr"},
+ },
+ },
+
+ { "_ynnrny",
+ {18, 17},
+ { {"00"_b, "_jplmmr"},
+ },
+ },
+
+ { "_ynqsgl",
+ {17},
+ { {"0"_b, "ld4_asisdlso_h4_4h"},
+ },
+ },
+
+ { "_ypjyqh",
+ {9, 8, 7, 6, 5, 0},
+ { {"111110"_b, "drps_64e_branch_reg"},
+ },
+ },
+
+ { "_yplktv",
+ {13, 12, 11, 10},
+ { {"0001"_b, "sub_asisdsame_only"},
+ {"0010"_b, "_llxlqz"},
+ {"0011"_b, "cmeq_asisdsame_only"},
+ {"0110"_b, "_pxkqxn"},
+ {"1010"_b, "_rhvksm"},
+ {"1101"_b, "sqrdmulh_asisdsame_only"},
+ {"1110"_b, "_gkkpjz"},
+ },
+ },
+
+ { "_yppszx",
+ {23, 22, 10},
+ { {"100"_b, "umlslb_z_zzzi_s"},
+ {"101"_b, "umlslt_z_zzzi_s"},
+ {"110"_b, "umlslb_z_zzzi_d"},
+ {"111"_b, "umlslt_z_zzzi_d"},
+ },
+ },
+
+ { "_yppyky",
+ {30, 13},
+ { {"00"_b, "_gyrjrm"},
+ {"01"_b, "_hhkqtn"},
+ {"10"_b, "_jgmlpk"},
+ {"11"_b, "_tzzssm"},
+ },
+ },
+
+ { "_ypqgyp",
{22},
- { {"0", "UnallocNEONStoreSingleStructPostIndex"},
- {"1", "UnallocNEONLoadSingleStructPostIndex"},
+ { {"0"_b, "ldrsw_64_ldst_regoff"},
},
},
- { "UnallocNEONModifiedImmediate",
- {30, 29, 15, 14, 13, 12, 11},
- { { "x00xxx1", "VisitUnallocated"},
- { "x010xx1", "VisitUnallocated"},
- { "x0110x1", "VisitUnallocated"},
- { "x011101", "VisitUnallocated"},
- { "0111110", "VisitUnallocated"},
- { "x1xxxx1", "VisitUnallocated"},
- { "otherwise", "VisitNEONModifiedImmediate"},
+ { "_ypznsm",
+ {23},
+ { {"0"_b, "fmaxnm_asimdsame_only"},
+ {"1"_b, "fminnm_asimdsame_only"},
+ },
+ },
+
+ { "_yqmqzp",
+ {18, 17, 12},
+ { {"000"_b, "st1_asisdlso_d1_1d"},
+ },
+ },
+
+ { "_yqmvxk",
+ {11, 10, 9, 8, 7, 6},
+ { {"000001"_b, "tcommit_only_barriers"},
+ {"xx1000"_b, "dsb_bon_barriers"},
+ {"xxxx10"_b, "dmb_bo_barriers"},
+ {"xxxx11"_b, "sb_only_barriers"},
+ },
+ },
+
+ { "_yqsgrt",
+ {23, 22, 20, 19, 16, 13, 12},
+ { {"0000000"_b, "_znmhps"},
+ {"0000010"_b, "_zssjpv"},
+ {"0000011"_b, "_smqvrs"},
+ {"0100000"_b, "_jrgzxt"},
+ {"0100010"_b, "_ppllxt"},
+ {"0100011"_b, "_hqlskj"},
+ {"100xx00"_b, "st3_asisdlsep_r3_r"},
+ {"100xx10"_b, "st1_asisdlsep_r3_r3"},
+ {"100xx11"_b, "st1_asisdlsep_r1_r1"},
+ {"1010x00"_b, "st3_asisdlsep_r3_r"},
+ {"1010x10"_b, "st1_asisdlsep_r3_r3"},
+ {"1010x11"_b, "st1_asisdlsep_r1_r1"},
+ {"1011000"_b, "st3_asisdlsep_r3_r"},
+ {"1011010"_b, "st1_asisdlsep_r3_r3"},
+ {"1011011"_b, "st1_asisdlsep_r1_r1"},
+ {"1011100"_b, "_ngxkmp"},
+ {"1011110"_b, "_qgryzh"},
+ {"1011111"_b, "_tjltls"},
+ {"110xx00"_b, "ld3_asisdlsep_r3_r"},
+ {"110xx10"_b, "ld1_asisdlsep_r3_r3"},
+ {"110xx11"_b, "ld1_asisdlsep_r1_r1"},
+ {"1110x00"_b, "ld3_asisdlsep_r3_r"},
+ {"1110x10"_b, "ld1_asisdlsep_r3_r3"},
+ {"1110x11"_b, "ld1_asisdlsep_r1_r1"},
+ {"1111000"_b, "ld3_asisdlsep_r3_r"},
+ {"1111010"_b, "ld1_asisdlsep_r3_r3"},
+ {"1111011"_b, "ld1_asisdlsep_r1_r1"},
+ {"1111100"_b, "_zzgrjz"},
+ {"1111110"_b, "_phtnny"},
+ {"1111111"_b, "_txjyxr"},
+ },
+ },
+
+ { "_yqvqtx",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1rob_z_p_bi_u8"},
+ {"000x0"_b, "ld1rob_z_p_br_contiguous"},
+ {"01001"_b, "ld1roh_z_p_bi_u16"},
+ {"010x0"_b, "ld1roh_z_p_br_contiguous"},
+ },
+ },
+
+ { "_yqxnzl",
+ {11, 10},
+ { {"00"_b, "sqdmulh_z_zz"},
+ {"01"_b, "sqrdmulh_z_zz"},
},
},
- { "UnallocNEONPerm",
+ { "_yrgnqz",
{13, 12},
- { {"00", "VisitUnallocated"},
- {"otherwise", "VisitNEONPerm"},
- },
- },
-
- { "UnallocNEONScalar2RegMisc",
- {16, 15, 14, 13, 12, 23, 22, 29},
- { {"0000xxxx", "VisitUnallocated"},
- {"00010xxx", "VisitUnallocated"},
- {"0010xxxx", "VisitUnallocated"},
- {"00110xxx", "VisitUnallocated"},
- {"01010xx1", "VisitUnallocated"},
- {"011xx0xx", "VisitUnallocated"},
- {"011101x1", "VisitUnallocated"},
- {"01111xxx", "VisitUnallocated"},
- {"1000xxxx", "VisitUnallocated"},
- {"10010xx0", "VisitUnallocated"},
- {"10011xxx", "VisitUnallocated"},
- {"10101xxx", "VisitUnallocated"},
- {"101100x0", "VisitUnallocated"},
- {"101101xx", "VisitUnallocated"},
- {"10111xxx", "VisitUnallocated"},
- {"1100xxxx", "VisitUnallocated"},
- {"111001xx", "VisitUnallocated"},
- {"11110xxx", "VisitUnallocated"},
- {"111110xx", "VisitUnallocated"},
- {"111111x1", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalar2RegMisc"},
- },
- },
-
- { "UnallocNEONScalar2RegMiscFP16",
- {29, 23, 22, 16, 15, 14, 13, 12},
- { {"xx0xxxxx", "VisitUnallocated"},
- {"xx100xxx", "VisitUnallocated"},
- {"xx1010xx", "VisitUnallocated"},
- {"xx110xxx", "VisitUnallocated"},
- {"xx11100x", "VisitUnallocated"},
- {"xx111110", "VisitUnallocated"},
- {"x01011xx", "VisitUnallocated"},
- {"x0111111", "VisitUnallocated"},
- {"x1101111", "VisitUnallocated"},
- {"x1111100", "VisitUnallocated"},
- {"11101110", "VisitUnallocated"},
- {"11111111", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalar2RegMiscFP16"},
- },
- },
-
- { "UnallocNEONScalar3Diff",
- {14, 13, 12, 29},
- { {"000x", "VisitUnallocated"},
- {"0011", "VisitUnallocated"},
- {"010x", "VisitUnallocated"},
- {"0111", "VisitUnallocated"},
- {"100x", "VisitUnallocated"},
- {"1011", "VisitUnallocated"},
- {"11xx", "VisitUnallocated"},
- {"otherwise", "UnallocNEONScalar3Diff_2"},
- },
- },
-
- { "UnallocNEONScalar3Diff_2",
- {15},
- { {"0", "VisitUnallocated"},
- {"1", "VisitNEONScalar3Diff"},
- },
- },
-
- { "UnallocNEONScalar3Same",
- {15, 14, 13, 12, 11, 23, 22, 29},
- { {"00000xxx", "VisitUnallocated"},
- {"0001xxxx", "VisitUnallocated"},
- {"00100xxx", "VisitUnallocated"},
- {"011xxxxx", "VisitUnallocated"},
- {"1001xxxx", "VisitUnallocated"},
- {"1010xxxx", "VisitUnallocated"},
- {"10111xxx", "VisitUnallocated"},
- {"1100xxxx", "VisitUnallocated"},
- {"110100xx", "VisitUnallocated"},
- {"110101x0", "VisitUnallocated"},
- {"110110x1", "VisitUnallocated"},
- {"110111xx", "VisitUnallocated"},
- {"111001x0", "VisitUnallocated"},
- {"111010x0", "VisitUnallocated"},
- {"111011x0", "VisitUnallocated"},
- {"11110xxx", "VisitUnallocated"},
- {"111110x1", "VisitUnallocated"},
- {"111111x1", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalar3Same"},
- },
- },
-
- { "UnallocNEONScalar3SameExtra",
- {29, 14, 13, 12, 11},
- { {"x001x", "VisitUnallocated"},
- {"x01xx", "VisitUnallocated"},
- {"x1xxx", "VisitUnallocated"},
- {"00000", "VisitUnallocated"},
- {"00001", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalar3SameExtra"},
- },
- },
-
- { "UnallocNEONScalar3SameFP16",
- {29, 23, 13, 12, 11},
- { {"00011", "VisitNEONScalar3SameFP16"},
- {"00100", "VisitNEONScalar3SameFP16"},
- {"00111", "VisitNEONScalar3SameFP16"},
- {"01111", "VisitNEONScalar3SameFP16"},
- {"10100", "VisitNEONScalar3SameFP16"},
- {"10101", "VisitNEONScalar3SameFP16"},
- {"11010", "VisitNEONScalar3SameFP16"},
- {"11100", "VisitNEONScalar3SameFP16"},
- {"11101", "VisitNEONScalar3SameFP16"},
- {"otherwise", "VisitUnallocated"},
- },
- },
-
- { "UnallocNEONScalarByIndexedElement",
- {29, 23, 22, 15, 14, 13, 12},
- { {"0xx1111", "VisitUnallocated"},
- {"1000001", "VisitUnallocated"},
- {"1000101", "VisitUnallocated"},
- {"11x0001", "VisitUnallocated"},
- {"11x0101", "VisitUnallocated"},
- {"1xx0011", "VisitUnallocated"},
- {"1xx0111", "VisitUnallocated"},
- {"1xx1011", "VisitUnallocated"},
- {"1xx1100", "VisitUnallocated"},
- {"x010001", "VisitUnallocated"},
- {"x010101", "VisitUnallocated"},
- {"x011001", "VisitUnallocated"},
- {"xxx0000", "VisitUnallocated"},
- {"xxx0010", "VisitUnallocated"},
- {"xxx0100", "VisitUnallocated"},
- {"xxx0110", "VisitUnallocated"},
- {"xxx1000", "VisitUnallocated"},
- {"xxx1010", "VisitUnallocated"},
- {"xxx1110", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalarByIndexedElement"},
- },
- },
-
- { "UnallocNEONScalarCopy",
- {14, 13, 12, 11},
- { {"0000", "UnallocNEONScalarCopy_2"},
- {"otherwise", "VisitUnallocated"},
- },
- },
-
- { "UnallocNEONScalarCopy_2",
- {19, 18, 17, 16},
- { {"0000", "VisitUnallocated"},
- {"otherwise", "UnallocNEONScalarCopy_3"},
- },
- },
-
- { "UnallocNEONScalarCopy_3",
- {29},
- { {"0", "VisitNEONScalarCopy"},
- {"1", "VisitUnallocated"},
- },
- },
-
- { "UnallocNEONScalarPairwise",
- {29, 23, 22, 16, 15, 14, 13, 12},
- { {"xxx00xxx", "VisitUnallocated"},
- {"xxx010xx", "VisitUnallocated"},
- {"xxx01110", "VisitUnallocated"},
- {"xxx10xxx", "VisitUnallocated"},
- {"xxx1100x", "VisitUnallocated"},
- {"xxx11010", "VisitUnallocated"},
- {"xxx111xx", "VisitUnallocated"},
- {"x1x01101", "VisitUnallocated"},
- {"00101100", "VisitUnallocated"},
- {"00101101", "VisitUnallocated"},
- {"00101111", "VisitUnallocated"},
- {"01101100", "VisitUnallocated"},
- {"01101111", "VisitUnallocated"},
- {"1xx11011", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalarPairwise"},
- },
- },
-
- { "UnallocNEONScalarShiftImmediate",
- {15, 14, 13, 12, 11, 29},
- { {"00001x", "VisitUnallocated"},
- {"00011x", "VisitUnallocated"},
- {"00101x", "VisitUnallocated"},
- {"00111x", "VisitUnallocated"},
- {"010000", "VisitUnallocated"},
- {"01001x", "VisitUnallocated"},
- {"01011x", "VisitUnallocated"},
- {"011000", "VisitUnallocated"},
- {"01101x", "VisitUnallocated"},
- {"01111x", "VisitUnallocated"},
- {"100000", "VisitUnallocated"},
- {"100010", "VisitUnallocated"},
- {"101xxx", "VisitUnallocated"},
- {"110xxx", "VisitUnallocated"},
- {"11101x", "VisitUnallocated"},
- {"11110x", "VisitUnallocated"},
- {"otherwise", "UnallocNEONScalarShiftImmediate_2"},
- },
- },
-
- { "UnallocNEONScalarShiftImmediate_2",
- {22, 21, 20, 19},
- { {"0000", "VisitUnallocated"},
- {"otherwise", "VisitNEONScalarShiftImmediate"},
- },
- },
-
- { "UnallocNEONShiftImmediate",
- {15, 14, 13, 12, 11, 29},
- { {"00001x", "VisitUnallocated"},
- {"00011x", "VisitUnallocated"},
- {"00101x", "VisitUnallocated"},
- {"00111x", "VisitUnallocated"},
- {"010000", "VisitUnallocated"},
- {"01001x", "VisitUnallocated"},
- {"01011x", "VisitUnallocated"},
- {"011000", "VisitUnallocated"},
- {"01101x", "VisitUnallocated"},
- {"01111x", "VisitUnallocated"},
- {"10101x", "VisitUnallocated"},
- {"1011xx", "VisitUnallocated"},
- {"110xxx", "VisitUnallocated"},
- {"11101x", "VisitUnallocated"},
- {"11110x", "VisitUnallocated"},
- {"otherwise", "VisitNEONShiftImmediate"},
- },
- },
-
- { "UnallocNEONStoreSingleStruct",
- {21, 15, 14, 13, 12, 11, 10},
- { {"0010xx1", "VisitUnallocated"},
- {"0011xx1", "VisitUnallocated"},
- {"0100101", "VisitUnallocated"},
- {"0100x1x", "VisitUnallocated"},
- {"0101011", "VisitUnallocated"},
- {"01011x1", "VisitUnallocated"},
- {"0101x10", "VisitUnallocated"},
- {"1010xx1", "VisitUnallocated"},
- {"1011xx1", "VisitUnallocated"},
- {"1100011", "VisitUnallocated"},
- {"11001x1", "VisitUnallocated"},
- {"1100x10", "VisitUnallocated"},
- {"1101011", "VisitUnallocated"},
- {"11011x1", "VisitUnallocated"},
- {"1101x10", "VisitUnallocated"},
- {"x11xxxx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreSingleStruct"},
- },
- },
-
- { "UnallocNEONStoreSingleStructPostIndex",
- {21, 15, 14, 13, 12, 11, 10},
- { {"0010xx1", "VisitUnallocated"},
- {"0011xx1", "VisitUnallocated"},
- {"0100101", "VisitUnallocated"},
- {"0100x1x", "VisitUnallocated"},
- {"0101011", "VisitUnallocated"},
- {"01011x1", "VisitUnallocated"},
- {"0101x10", "VisitUnallocated"},
- {"1010xx1", "VisitUnallocated"},
- {"1011xx1", "VisitUnallocated"},
- {"1100011", "VisitUnallocated"},
- {"11001x1", "VisitUnallocated"},
- {"1100x10", "VisitUnallocated"},
- {"1101011", "VisitUnallocated"},
- {"11011x1", "VisitUnallocated"},
- {"1101x10", "VisitUnallocated"},
- {"x11xxxx", "VisitUnallocated"},
- {"otherwise", "VisitNEONLoadStoreSingleStructPostIndex"},
- },
- },
-
- { "UnallocNEONTable",
+ { {"00"_b, "sshl_asisdsame_only"},
+ {"01"_b, "srshl_asisdsame_only"},
+ },
+ },
+
+ { "_yrlzqp",
+ {22, 13, 12},
+ { {"000"_b, "ldapr_64l_memop"},
+ },
+ },
+
+ { "_yrmmmg",
+ {4},
+ { {"0"_b, "cmphs_p_p_zi"},
+ {"1"_b, "cmphi_p_p_zi"},
+ },
+ },
+
+ { "_yrrppk",
+ {20, 19, 18, 17, 16},
+ { {"00000"_b, "fcvtns_32d_float2int"},
+ {"00001"_b, "fcvtnu_32d_float2int"},
+ {"00010"_b, "scvtf_d32_float2int"},
+ {"00011"_b, "ucvtf_d32_float2int"},
+ {"00100"_b, "fcvtas_32d_float2int"},
+ {"00101"_b, "fcvtau_32d_float2int"},
+ {"01000"_b, "fcvtps_32d_float2int"},
+ {"01001"_b, "fcvtpu_32d_float2int"},
+ {"10000"_b, "fcvtms_32d_float2int"},
+ {"10001"_b, "fcvtmu_32d_float2int"},
+ {"11000"_b, "fcvtzs_32d_float2int"},
+ {"11001"_b, "fcvtzu_32d_float2int"},
+ {"11110"_b, "fjcvtzs_32d_float2int"},
+ },
+ },
+
+ { "_ysjqhn",
+ {30, 23, 22},
+ { {"00x"_b, "adds_64_addsub_shift"},
+ {"010"_b, "adds_64_addsub_shift"},
+ {"10x"_b, "subs_64_addsub_shift"},
+ {"110"_b, "subs_64_addsub_shift"},
+ },
+ },
+
+ { "_yskkjs",
+ {13, 12},
+ { {"00"_b, "sbcs_64_addsub_carry"},
+ },
+ },
+
+ { "_yszjsm",
+ {12, 11, 10},
+ { {"000"_b, "sdot_z_zzz"},
+ {"001"_b, "udot_z_zzz"},
+ {"010"_b, "sqdmlalbt_z_zzz"},
+ {"011"_b, "sqdmlslbt_z_zzz"},
+ {"1xx"_b, "cdot_z_zzz"},
+ },
+ },
+
+ { "_ytkjxx",
+ {30, 23, 22, 13, 4},
+ { {"00x0x"_b, "ld1w_z_p_bz_s_x32_scaled"},
+ {"00x1x"_b, "ldff1w_z_p_bz_s_x32_scaled"},
+ {"0100x"_b, "ldr_z_bi"},
+ {"01100"_b, "prfw_i_p_bi_s"},
+ {"01110"_b, "prfd_i_p_bi_s"},
+ {"10x0x"_b, "ld1w_z_p_bz_d_x32_scaled"},
+ {"10x1x"_b, "ldff1w_z_p_bz_d_x32_scaled"},
+ {"11x0x"_b, "ld1d_z_p_bz_d_x32_scaled"},
+ {"11x1x"_b, "ldff1d_z_p_bz_d_x32_scaled"},
+ },
+ },
+
+ { "_ytsghm",
+ {30, 23, 22},
+ { {"000"_b, "msub_32a_dp_3src"},
+ },
+ },
+
+ { "_ytvtqn",
+ {30, 23, 22, 20, 13},
+ { {"00001"_b, "ld1sh_z_p_bi_s64"},
+ {"00011"_b, "ldnf1sh_z_p_bi_s64"},
+ {"00101"_b, "ld1w_z_p_bi_u32"},
+ {"00111"_b, "ldnf1w_z_p_bi_u32"},
+ {"01001"_b, "ld1sb_z_p_bi_s64"},
+ {"01011"_b, "ldnf1sb_z_p_bi_s64"},
+ {"01101"_b, "ld1sb_z_p_bi_s16"},
+ {"01111"_b, "ldnf1sb_z_p_bi_s16"},
+ {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"},
+ {"100x1"_b, "st1w_z_p_bz_d_64_unscaled"},
+ {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"},
+ {"101x1"_b, "st1w_z_p_ai_d"},
+ {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"},
+ {"110x1"_b, "st1d_z_p_bz_d_64_unscaled"},
+ {"111x1"_b, "st1d_z_p_ai_d"},
+ },
+ },
+
+ { "_ytvxsl",
+ {30, 23, 22},
+ { {"000"_b, "stlxrb_sr32_ldstexcl"},
+ {"001"_b, "ldaxrb_lr32_ldstexcl"},
+ {"010"_b, "stlrb_sl32_ldstexcl"},
+ {"011"_b, "ldarb_lr32_ldstexcl"},
+ {"100"_b, "stlxrh_sr32_ldstexcl"},
+ {"101"_b, "ldaxrh_lr32_ldstexcl"},
+ {"110"_b, "stlrh_sl32_ldstexcl"},
+ {"111"_b, "ldarh_lr32_ldstexcl"},
+ },
+ },
+
+ { "_yvgqjx",
+ {13, 12, 5},
+ { {"010"_b, "_tnzytv"},
+ {"011"_b, "_vmpnlv"},
+ {"100"_b, "_hhhqjk"},
+ {"101"_b, "_tkzqqp"},
+ {"110"_b, "_sphpkr"},
+ {"111"_b, "_spglxn"},
+ },
+ },
+
+ { "_yvhnlk",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0001111"_b, "casp_cp32_ldstexcl"},
+ {"0011111"_b, "caspa_cp32_ldstexcl"},
+ {"0101111"_b, "casb_c32_ldstexcl"},
+ {"0111111"_b, "casab_c32_ldstexcl"},
+ {"1001111"_b, "casp_cp64_ldstexcl"},
+ {"1011111"_b, "caspa_cp64_ldstexcl"},
+ {"1101111"_b, "cash_c32_ldstexcl"},
+ {"1111111"_b, "casah_c32_ldstexcl"},
+ },
+ },
+
+ { "_yvlhjg",
+ {23},
+ { {"0"_b, "frecps_asimdsame_only"},
+ {"1"_b, "frsqrts_asimdsame_only"},
+ },
+ },
+
+ { "_yvnjkr",
+ {9, 8, 7, 6, 5},
+ { {"11111"_b, "autdzb_64z_dp_1src"},
+ },
+ },
+
+ { "_yvptvx",
+ {23, 12, 11, 10},
+ { {"0000"_b, "sqshrnb_z_zi"},
+ {"0001"_b, "sqshrnt_z_zi"},
+ {"0010"_b, "sqrshrnb_z_zi"},
+ {"0011"_b, "sqrshrnt_z_zi"},
+ {"0100"_b, "uqshrnb_z_zi"},
+ {"0101"_b, "uqshrnt_z_zi"},
+ {"0110"_b, "uqrshrnb_z_zi"},
+ {"0111"_b, "uqrshrnt_z_zi"},
+ },
+ },
+
+ { "_yvxgrr",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0111001"_b, "frintm_asimdmiscfp16_r"},
+ {"0x00001"_b, "frintm_asimdmisc_r"},
+ {"1111001"_b, "frintz_asimdmiscfp16_r"},
+ {"1x00001"_b, "frintz_asimdmisc_r"},
+ {"xx00000"_b, "cmeq_asimdmisc_z"},
+ },
+ },
+
+ { "_yvygml",
+ {30},
+ { {"0"_b, "_jkrlsg"},
+ {"1"_b, "_vvrmvg"},
+ },
+ },
+
+ { "_yvyhlh",
+ {23, 22, 12, 11, 10},
+ { {"0x000"_b, "fmul_z_zzi_h"},
+ {"10000"_b, "fmul_z_zzi_s"},
+ {"11000"_b, "fmul_z_zzi_d"},
+ },
+ },
+
+ { "_yvyxkx",
+ {10},
+ { {"0"_b, "sha512su0_vv2_cryptosha512_2"},
+ {"1"_b, "sm4e_vv4_cryptosha512_2"},
+ },
+ },
+
+ { "_yxhrpk",
{23, 22},
- { {"00", "VisitNEONTable"},
- {"otherwise", "VisitUnallocated"},
+ { {"00"_b, "fmlal2_asimdsame_f"},
+ {"10"_b, "fmlsl2_asimdsame_f"},
},
},
- { "UnallocRotateRightIntoFlags",
- {31, 30, 29, 4},
- { {"1010", "VisitRotateRightIntoFlags"},
- {"otherwise", "VisitUnallocated"},
+ { "_yxmkzr",
+ {12},
+ { {"0"_b, "st1_asisdlsop_dx1_r1d"},
},
},
- { "UnallocSystem",
- {21, 20, 19, 15, 14, 13, 12},
- { {"0000101", "VisitUnallocated"},
- {"000011x", "VisitUnallocated"},
- {"0001xxx", "VisitUnallocated"},
- {"100xxxx", "VisitUnallocated"},
- {"otherwise", "UnallocSystem_2"},
+ { "_yxnslx",
+ {23, 22},
+ { {"00"_b, "adr_z_az_d_s32_scaled"},
+ {"01"_b, "adr_z_az_d_u32_scaled"},
+ {"1x"_b, "adr_z_az_sd_same_scaled"},
+ },
+ },
+
+ { "_yykhjv",
+ {23, 22, 13, 12, 11, 10},
+ { {"000110"_b, "smmla_z_zzz"},
+ {"0x1000"_b, "sshllb_z_zi"},
+ {"0x1001"_b, "sshllt_z_zi"},
+ {"0x1010"_b, "ushllb_z_zi"},
+ {"0x1011"_b, "ushllt_z_zi"},
+ {"100110"_b, "usmmla_z_zzz"},
+ {"110110"_b, "ummla_z_zzz"},
+ {"xx0000"_b, "saddlbt_z_zz"},
+ {"xx0010"_b, "ssublbt_z_zz"},
+ {"xx0011"_b, "ssubltb_z_zz"},
+ {"xx0100"_b, "eorbt_z_zz"},
+ {"xx0101"_b, "eortb_z_zz"},
+ {"xx1100"_b, "bext_z_zz"},
+ {"xx1101"_b, "bdep_z_zz"},
+ {"xx1110"_b, "bgrp_z_zz"},
},
},
- { "UnallocSystem_2",
- {21, 20, 19, 15, 14, 13},
- { {"000000", "VisitUnallocated"},
- {"otherwise", "UnallocSystem_3"},
+ { "_yynmjl",
+ {4},
+ { {"0"_b, "orrs_p_p_pp_z"},
+ {"1"_b, "orns_p_p_pp_z"},
},
},
- { "UnallocSystem_3",
- {21, 20, 19, 16, 15, 14, 13},
- { {"0000001", "VisitUnallocated"},
- {"otherwise", "UnallocSystem_4"},
+ { "_yyrkmn",
+ {17, 16, 9, 8, 7, 6, 5},
+ { {"0000000"_b, "aesmc_z_z"},
+ {"10xxxxx"_b, "aese_z_zz"},
+ {"11xxxxx"_b, "sm4e_z_zz"},
},
},
- { "UnallocSystem_4",
- {21, 20, 19, 17, 15, 14, 13},
- { {"0000001", "VisitUnallocated"},
- {"otherwise", "UnallocSystem_5"},
+ { "_yytvxh",
+ {30, 23, 22, 13, 4},
+ { {"00000"_b, "prfw_i_p_br_s"},
+ {"00010"_b, "prfw_i_p_ai_s"},
+ {"0010x"_b, "ld1rw_z_p_bi_u32"},
+ {"0011x"_b, "ld1rw_z_p_bi_u64"},
+ {"01000"_b, "prfd_i_p_br_s"},
+ {"01010"_b, "prfd_i_p_ai_s"},
+ {"0110x"_b, "ld1rsb_z_p_bi_s16"},
+ {"0111x"_b, "ld1rd_z_p_bi_u64"},
+ {"1000x"_b, "ldnt1w_z_p_ar_d_64_unscaled"},
+ {"10010"_b, "prfw_i_p_ai_d"},
+ {"1010x"_b, "ld1w_z_p_bz_d_64_unscaled"},
+ {"1011x"_b, "ldff1w_z_p_bz_d_64_unscaled"},
+ {"1100x"_b, "ldnt1d_z_p_ar_d_64_unscaled"},
+ {"11010"_b, "prfd_i_p_ai_d"},
+ {"1110x"_b, "ld1d_z_p_bz_d_64_unscaled"},
+ {"1111x"_b, "ldff1d_z_p_bz_d_64_unscaled"},
},
},
- { "UnallocSystem_5",
- {21, 20, 19, 18, 15, 14, 13},
- { {"0001001", "VisitUnallocated"},
- {"otherwise", "VisitSystem"},
+ { "_yyyshx",
+ {30, 13, 4},
+ { {"000"_b, "cmphs_p_p_zz"},
+ {"001"_b, "cmphi_p_p_zz"},
+ {"010"_b, "cmpeq_p_p_zw"},
+ {"011"_b, "cmpne_p_p_zw"},
+ {"1xx"_b, "fcmla_z_p_zzz"},
},
},
- { "UnallocUnconditionalBranchToRegister",
- {15, 14, 13, 12},
- { {"0000", "UnallocUnconditionalBranchToRegister_2"},
- {"otherwise", "VisitUnallocated"},
+ { "_yzmjhn",
+ {4},
+ { {"0"_b, "eors_p_p_pp_z"},
},
},
- { "UnallocUnconditionalBranchToRegister_2",
+ { "_yzqtyl",
{20, 19, 18, 17, 16},
- { {"11111", "UnallocUnconditionalBranchToRegister_3"},
- {"otherwise", "VisitUnallocated"},
+ { {"00001"_b, "sqxtun_asisdmisc_n"},
},
},
- { "UnallocUnconditionalBranchToRegister_3",
- {24, 23, 22, 21},
- { {"0011", "VisitUnallocated"},
- {"011x", "VisitUnallocated"},
- {"otherwise", "VisitUnconditionalBranchToRegister"},
+ { "_yzzlxs",
+ {23, 4},
+ { {"00"_b, "_mpgrgp"},
},
},
- { "DecodeSVE101xxxxx",
- {15, 14, 13},
- { {"101", "DecodeSVE101xx101"},
- {"010", "VisitSVEContiguousLoad_ScalarPlusScalar"},
- {"otherwise", "VisitSVEMemContiguousLoad"},
+ { "_zgjpym",
+ {23, 22, 20, 19, 11},
+ { {"00010"_b, "srsra_asisdshf_r"},
+ {"001x0"_b, "srsra_asisdshf_r"},
+ {"01xx0"_b, "srsra_asisdshf_r"},
},
},
- { "DecodeSVE101xx101",
- {20},
- { {"0", "VisitSVEContiguousLoad_ScalarPlusImm"},
- {"1", "VisitSVEMemContiguousLoad"},
+ { "_zglksl",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1101001"_b, "ummla_asimdsame2_g"},
+ {"xxx0001"_b, "sqrdmlah_asimdsame2_only"},
+ {"xxx0011"_b, "sqrdmlsh_asimdsame2_only"},
+ {"xxx0101"_b, "udot_asimdsame2_d"},
},
},
- { "DecodeSVE00000001",
- {20, 19},
- { {"10", "VisitSVEMovprfx"},
- {"otherwise", "VisitSVEIntReduction"},
+ { "_zgysvr",
+ {30, 13},
+ { {"00"_b, "_xpqglq"},
+ {"10"_b, "_xstkrn"},
+ {"11"_b, "_zjzmvh"},
+ },
+ },
+
+ { "_zgzlhq",
+ {17},
+ { {"0"_b, "ld1_asisdlso_b1_1b"},
+ },
+ },
+
+ { "_zhkjzg",
+ {23, 22, 13},
+ { {"000"_b, "fmls_asimdelem_rh_h"},
+ {"1x0"_b, "fmls_asimdelem_r_sd"},
+ {"xx1"_b, "sqdmlsl_asimdelem_l"},
+ },
+ },
+
+ { "_zhpxqz",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_h_floatimm"},
+ },
+ },
+
+ { "_zhrtts",
+ {23, 22},
+ { {"00"_b, "_qlqhzg"},
+ },
+ },
+
+ { "_zjgvyp",
+ {30, 13, 12, 11, 10},
+ { {"00000"_b, "_ghnljt"},
+ },
+ },
+
+ { "_zjjxjl",
+ {9},
+ { {"0"_b, "pnext_p_p_p"},
+ },
+ },
+
+ { "_zjsgkm",
+ {4},
+ { {"0"_b, "ccmn_64_condcmp_reg"},
+ },
+ },
+
+ { "_zjslnr",
+ {30, 23, 22},
+ { {"000"_b, "sbfm_32m_bitfield"},
+ {"010"_b, "extr_32_extract"},
+ {"100"_b, "ubfm_32m_bitfield"},
+ },
+ },
+
+ { "_zjzmvh",
+ {23, 22, 20, 19, 18, 17, 16},
+ { {"0001010"_b, "fcvtx_z_p_z_d2s"},
+ {"0011xx0"_b, "flogb_z_p_z"},
+ {"0110010"_b, "scvtf_z_p_z_h2fp16"},
+ {"0110011"_b, "ucvtf_z_p_z_h2fp16"},
+ {"0110100"_b, "scvtf_z_p_z_w2fp16"},
+ {"0110101"_b, "ucvtf_z_p_z_w2fp16"},
+ {"0110110"_b, "scvtf_z_p_z_x2fp16"},
+ {"0110111"_b, "ucvtf_z_p_z_x2fp16"},
+ {"0111010"_b, "fcvtzs_z_p_z_fp162h"},
+ {"0111011"_b, "fcvtzu_z_p_z_fp162h"},
+ {"0111100"_b, "fcvtzs_z_p_z_fp162w"},
+ {"0111101"_b, "fcvtzu_z_p_z_fp162w"},
+ {"0111110"_b, "fcvtzs_z_p_z_fp162x"},
+ {"0111111"_b, "fcvtzu_z_p_z_fp162x"},
+ {"1001000"_b, "fcvt_z_p_z_s2h"},
+ {"1001001"_b, "fcvt_z_p_z_h2s"},
+ {"1001010"_b, "bfcvt_z_p_z_s2bf"},
+ {"1010100"_b, "scvtf_z_p_z_w2s"},
+ {"1010101"_b, "ucvtf_z_p_z_w2s"},
+ {"1011100"_b, "fcvtzs_z_p_z_s2w"},
+ {"1011101"_b, "fcvtzu_z_p_z_s2w"},
+ {"1101000"_b, "fcvt_z_p_z_d2h"},
+ {"1101001"_b, "fcvt_z_p_z_h2d"},
+ {"1101010"_b, "fcvt_z_p_z_d2s"},
+ {"1101011"_b, "fcvt_z_p_z_s2d"},
+ {"1110000"_b, "scvtf_z_p_z_w2d"},
+ {"1110001"_b, "ucvtf_z_p_z_w2d"},
+ {"1110100"_b, "scvtf_z_p_z_x2s"},
+ {"1110101"_b, "ucvtf_z_p_z_x2s"},
+ {"1110110"_b, "scvtf_z_p_z_x2d"},
+ {"1110111"_b, "ucvtf_z_p_z_x2d"},
+ {"1111000"_b, "fcvtzs_z_p_z_d2w"},
+ {"1111001"_b, "fcvtzu_z_p_z_d2w"},
+ {"1111100"_b, "fcvtzs_z_p_z_s2x"},
+ {"1111101"_b, "fcvtzu_z_p_z_s2x"},
+ {"1111110"_b, "fcvtzs_z_p_z_d2x"},
+ {"1111111"_b, "fcvtzu_z_p_z_d2x"},
+ {"xx00000"_b, "frintn_z_p_z"},
+ {"xx00001"_b, "frintp_z_p_z"},
+ {"xx00010"_b, "frintm_z_p_z"},
+ {"xx00011"_b, "frintz_z_p_z"},
+ {"xx00100"_b, "frinta_z_p_z"},
+ {"xx00110"_b, "frintx_z_p_z"},
+ {"xx00111"_b, "frinti_z_p_z"},
+ {"xx01100"_b, "frecpx_z_p_z"},
+ {"xx01101"_b, "fsqrt_z_p_z"},
+ },
+ },
+
+ { "_zkhjsp",
+ {11},
+ { {"0"_b, "sqdmulh_z_zzi_h"},
+ {"1"_b, "mul_z_zzi_h"},
+ },
+ },
+
+ { "_zkqtrj",
+ {30},
+ { {"0"_b, "b_only_branch_imm"},
+ },
+ },
+
+ { "_zkttzl",
+ {23, 22, 20, 19, 18, 16, 13},
+ { {"0000000"_b, "_tsvsgh"},
+ {"0000001"_b, "_rkrltp"},
+ {"0100000"_b, "_zgzlhq"},
+ {"0100001"_b, "_nrssjz"},
+ {"100xxx0"_b, "st1_asisdlsop_bx1_r1b"},
+ {"100xxx1"_b, "st3_asisdlsop_bx3_r3b"},
+ {"1010xx0"_b, "st1_asisdlsop_bx1_r1b"},
+ {"1010xx1"_b, "st3_asisdlsop_bx3_r3b"},
+ {"10110x0"_b, "st1_asisdlsop_bx1_r1b"},
+ {"10110x1"_b, "st3_asisdlsop_bx3_r3b"},
+ {"1011100"_b, "st1_asisdlsop_bx1_r1b"},
+ {"1011101"_b, "st3_asisdlsop_bx3_r3b"},
+ {"1011110"_b, "_rnypvh"},
+ {"1011111"_b, "_nxjgmm"},
+ {"110xxx0"_b, "ld1_asisdlsop_bx1_r1b"},
+ {"110xxx1"_b, "ld3_asisdlsop_bx3_r3b"},
+ {"1110xx0"_b, "ld1_asisdlsop_bx1_r1b"},
+ {"1110xx1"_b, "ld3_asisdlsop_bx3_r3b"},
+ {"11110x0"_b, "ld1_asisdlsop_bx1_r1b"},
+ {"11110x1"_b, "ld3_asisdlsop_bx3_r3b"},
+ {"1111100"_b, "ld1_asisdlsop_bx1_r1b"},
+ {"1111101"_b, "ld3_asisdlsop_bx3_r3b"},
+ {"1111110"_b, "_qqtpln"},
+ {"1111111"_b, "_glhxyj"},
+ },
+ },
+
+ { "_zlmgyp",
+ {23, 22, 13},
+ { {"000"_b, "fmla_asimdelem_rh_h"},
+ {"1x0"_b, "fmla_asimdelem_r_sd"},
+ {"xx1"_b, "sqdmlal_asimdelem_l"},
+ },
+ },
+
+ { "_zmkqxl",
+ {23, 10},
+ { {"00"_b, "adclb_z_zzz"},
+ {"01"_b, "adclt_z_zzz"},
+ {"10"_b, "sbclb_z_zzz"},
+ {"11"_b, "sbclt_z_zzz"},
+ },
+ },
+
+ { "_zmpzkg",
+ {23, 22, 20, 19, 13, 11},
+ { {"0000x0"_b, "orr_asimdimm_l_sl"},
+ {"00x100"_b, "shl_asimdshf_r"},
+ {"00x110"_b, "sqshl_asimdshf_r"},
+ {"010x00"_b, "shl_asimdshf_r"},
+ {"010x10"_b, "sqshl_asimdshf_r"},
+ {"011100"_b, "shl_asimdshf_r"},
+ {"011110"_b, "sqshl_asimdshf_r"},
+ {"0x1000"_b, "shl_asimdshf_r"},
+ {"0x1010"_b, "sqshl_asimdshf_r"},
+ },
+ },
+
+ { "_zmtkvx",
+ {13, 10},
+ { {"00"_b, "_rhpmjz"},
+ },
+ },
+
+ { "_zmzxjm",
+ {17},
+ { {"0"_b, "faddv_v_p_z"},
+ },
+ },
+
+ { "_znmhps",
+ {18, 17},
+ { {"00"_b, "st3_asisdlse_r3"},
+ },
+ },
+
+ { "_zpmkvt",
+ {12},
+ { {"1"_b, "_vqqrjl"},
+ },
+ },
+
+ { "_zpnsrv",
+ {23, 22, 13},
+ { {"000"_b, "fmul_asimdelem_rh_h"},
+ {"1x0"_b, "fmul_asimdelem_r_sd"},
+ {"xx1"_b, "sqdmull_asimdelem_l"},
+ },
+ },
+
+ { "_zppjvk",
+ {12},
+ { {"0"_b, "ld2_asisdlsop_dx2_r2d"},
+ },
+ },
+
+ { "_zpsymj",
+ {22, 13, 12},
+ { {"000"_b, "swp_64_memop"},
+ {"001"_b, "_yjztsq"},
+ {"010"_b, "st64bv0_64_memop"},
+ {"011"_b, "st64bv_64_memop"},
+ {"100"_b, "swpl_64_memop"},
+ },
+ },
+
+ { "_zpzghs",
+ {30, 23, 22},
+ { {"000"_b, "stnp_q_ldstnapair_offs"},
+ {"001"_b, "ldnp_q_ldstnapair_offs"},
+ {"010"_b, "stp_q_ldstpair_post"},
+ {"011"_b, "ldp_q_ldstpair_post"},
+ },
+ },
+
+ { "_zqltpy",
+ {9, 8, 7, 6, 5},
+ { {"00000"_b, "fmov_s_floatimm"},
+ },
+ },
+
+ { "_zqmmsk",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"0000000"_b, "ldaddb_32_memop"},
+ {"0000100"_b, "ldclrb_32_memop"},
+ {"0001000"_b, "ldeorb_32_memop"},
+ {"0001100"_b, "ldsetb_32_memop"},
+ {"000xx10"_b, "strb_32b_ldst_regoff"},
+ {"0010000"_b, "ldaddlb_32_memop"},
+ {"0010100"_b, "ldclrlb_32_memop"},
+ {"0011000"_b, "ldeorlb_32_memop"},
+ {"0011100"_b, "ldsetlb_32_memop"},
+ {"001xx10"_b, "ldrb_32b_ldst_regoff"},
+ {"0100000"_b, "ldaddab_32_memop"},
+ {"0100100"_b, "ldclrab_32_memop"},
+ {"0101000"_b, "ldeorab_32_memop"},
+ {"0101100"_b, "ldsetab_32_memop"},
+ {"010xx10"_b, "ldrsb_64b_ldst_regoff"},
+ {"0110000"_b, "ldaddalb_32_memop"},
+ {"0110100"_b, "ldclralb_32_memop"},
+ {"0111000"_b, "ldeoralb_32_memop"},
+ {"0111100"_b, "ldsetalb_32_memop"},
+ {"011xx10"_b, "ldrsb_32b_ldst_regoff"},
+ {"1000000"_b, "ldaddh_32_memop"},
+ {"1000100"_b, "ldclrh_32_memop"},
+ {"1001000"_b, "ldeorh_32_memop"},
+ {"1001100"_b, "ldseth_32_memop"},
+ {"100xx10"_b, "strh_32_ldst_regoff"},
+ {"1010000"_b, "ldaddlh_32_memop"},
+ {"1010100"_b, "ldclrlh_32_memop"},
+ {"1011000"_b, "ldeorlh_32_memop"},
+ {"1011100"_b, "ldsetlh_32_memop"},
+ {"101xx10"_b, "ldrh_32_ldst_regoff"},
+ {"1100000"_b, "ldaddah_32_memop"},
+ {"1100100"_b, "ldclrah_32_memop"},
+ {"1101000"_b, "ldeorah_32_memop"},
+ {"1101100"_b, "ldsetah_32_memop"},
+ {"110xx10"_b, "ldrsh_64_ldst_regoff"},
+ {"1110000"_b, "ldaddalh_32_memop"},
+ {"1110100"_b, "ldclralh_32_memop"},
+ {"1111000"_b, "ldeoralh_32_memop"},
+ {"1111100"_b, "ldsetalh_32_memop"},
+ {"111xx10"_b, "ldrsh_32_ldst_regoff"},
+ },
+ },
+
+ { "_zqmrhp",
+ {23, 22, 4, 3, 2, 1, 0},
+ { {"0000000"_b, "wrffr_f_p"},
+ },
+ },
+
+ { "_zrmgjx",
+ {30, 23, 22, 13, 4},
+ { {"01000"_b, "ldr_p_bi"},
+ {"01100"_b, "prfb_i_p_bi_s"},
+ {"01110"_b, "prfh_i_p_bi_s"},
+ {"10x0x"_b, "ld1sw_z_p_bz_d_x32_unscaled"},
+ {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_unscaled"},
},
},
-};
-// clang-format on
-static const VisitorNode kVisitorNodes[] = {
-#define VISITOR_NODES(A) {"Visit" #A, &Decoder::Visit##A},
- VISITOR_LIST(VISITOR_NODES)
-#undef VISITOR_NODES
+ { "_zrvlnx",
+ {13, 12},
+ { {"00"_b, "sbc_32_addsub_carry"},
+ },
+ },
+
+ { "_zryvjk",
+ {20, 9, 4},
+ { {"000"_b, "trn2_p_pp"},
+ },
+ },
+
+ { "_zslsvj",
+ {23, 22, 20, 19, 11},
+ { {"00011"_b, "fcvtzu_asisdshf_c"},
+ {"001x1"_b, "fcvtzu_asisdshf_c"},
+ {"01xx1"_b, "fcvtzu_asisdshf_c"},
+ },
+ },
+
+ { "_zsltyl",
+ {22, 20, 11},
+ { {"000"_b, "uqincw_r_rs_uw"},
+ {"001"_b, "uqdecw_r_rs_uw"},
+ {"010"_b, "uqincw_r_rs_x"},
+ {"011"_b, "uqdecw_r_rs_x"},
+ {"100"_b, "uqincd_r_rs_uw"},
+ {"101"_b, "uqdecd_r_rs_uw"},
+ {"110"_b, "uqincd_r_rs_x"},
+ {"111"_b, "uqdecd_r_rs_x"},
+ },
+ },
+
+ { "_zssjpv",
+ {18, 17},
+ { {"00"_b, "st1_asisdlse_r3_3v"},
+ },
+ },
+
+ { "_zsyggq",
+ {23, 10},
+ { {"00"_b, "_txhzxq"},
+ },
+ },
+
+ { "_ztpryr",
+ {13},
+ { {"0"_b, "fmad_z_p_zzz"},
+ {"1"_b, "fmsb_z_p_zzz"},
+ },
+ },
+
+ { "_ztyqrj",
+ {30, 23, 13, 12, 10},
+ { {"00000"_b, "_jmvgsp"},
+ {"00001"_b, "_jkkqvy"},
+ {"00100"_b, "_nkxhsy"},
+ {"00101"_b, "_gshrzq"},
+ {"00110"_b, "_zvjrlz"},
+ {"00111"_b, "_ntjpsx"},
+ {"01000"_b, "_mqrzzk"},
+ {"01001"_b, "_jqxqql"},
+ {"01100"_b, "_xznsqh"},
+ {"01101"_b, "_qvlnll"},
+ {"01110"_b, "_kvnqhn"},
+ {"01111"_b, "_zsltyl"},
+ {"10110"_b, "_zkhjsp"},
+ {"10111"_b, "_hvyjnk"},
+ {"11000"_b, "_sjvhlq"},
+ {"11001"_b, "_xhktsk"},
+ {"11010"_b, "_rtpztp"},
+ {"11011"_b, "_rznrqt"},
+ {"11100"_b, "_kyspnn"},
+ {"11101"_b, "_qljhnp"},
+ {"11110"_b, "_pxyrpm"},
+ {"11111"_b, "_khjvqq"},
+ },
+ },
+
+ { "_zvjrlz",
+ {22, 20, 11},
+ { {"000"_b, "sqincb_r_rs_sx"},
+ {"001"_b, "sqdecb_r_rs_sx"},
+ {"010"_b, "sqincb_r_rs_x"},
+ {"011"_b, "sqdecb_r_rs_x"},
+ {"100"_b, "sqinch_r_rs_sx"},
+ {"101"_b, "sqdech_r_rs_sx"},
+ {"110"_b, "sqinch_r_rs_x"},
+ {"111"_b, "sqdech_r_rs_x"},
+ },
+ },
+
+ { "_zvlxrl",
+ {23, 13, 12},
+ { {"010"_b, "fcmeq_asisdsame_only"},
+ },
+ },
+
+ { "_zvqghy",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1000000"_b, "sha256h_qqv_cryptosha3"},
+ {"1000100"_b, "sha256h2_qqv_cryptosha3"},
+ {"1001000"_b, "sha256su1_vvv_cryptosha3"},
+ },
+ },
+
+ { "_zxhhny",
+ {23, 22},
+ { {"00"_b, "fmsub_s_floatdp3"},
+ {"01"_b, "fmsub_d_floatdp3"},
+ {"11"_b, "fmsub_h_floatdp3"},
+ },
+ },
+
+ { "_zxspnk",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "sturb_32_ldst_unscaled"},
+ {"00001"_b, "strb_32_ldst_immpost"},
+ {"00010"_b, "sttrb_32_ldst_unpriv"},
+ {"00011"_b, "strb_32_ldst_immpre"},
+ {"00100"_b, "ldurb_32_ldst_unscaled"},
+ {"00101"_b, "ldrb_32_ldst_immpost"},
+ {"00110"_b, "ldtrb_32_ldst_unpriv"},
+ {"00111"_b, "ldrb_32_ldst_immpre"},
+ {"01000"_b, "ldursb_64_ldst_unscaled"},
+ {"01001"_b, "ldrsb_64_ldst_immpost"},
+ {"01010"_b, "ldtrsb_64_ldst_unpriv"},
+ {"01011"_b, "ldrsb_64_ldst_immpre"},
+ {"01100"_b, "ldursb_32_ldst_unscaled"},
+ {"01101"_b, "ldrsb_32_ldst_immpost"},
+ {"01110"_b, "ldtrsb_32_ldst_unpriv"},
+ {"01111"_b, "ldrsb_32_ldst_immpre"},
+ {"10000"_b, "sturh_32_ldst_unscaled"},
+ {"10001"_b, "strh_32_ldst_immpost"},
+ {"10010"_b, "sttrh_32_ldst_unpriv"},
+ {"10011"_b, "strh_32_ldst_immpre"},
+ {"10100"_b, "ldurh_32_ldst_unscaled"},
+ {"10101"_b, "ldrh_32_ldst_immpost"},
+ {"10110"_b, "ldtrh_32_ldst_unpriv"},
+ {"10111"_b, "ldrh_32_ldst_immpre"},
+ {"11000"_b, "ldursh_64_ldst_unscaled"},
+ {"11001"_b, "ldrsh_64_ldst_immpost"},
+ {"11010"_b, "ldtrsh_64_ldst_unpriv"},
+ {"11011"_b, "ldrsh_64_ldst_immpre"},
+ {"11100"_b, "ldursh_32_ldst_unscaled"},
+ {"11101"_b, "ldrsh_32_ldst_immpost"},
+ {"11110"_b, "ldtrsh_32_ldst_unpriv"},
+ {"11111"_b, "ldrsh_32_ldst_immpre"},
+ },
+ },
+
+ { "_zxtzmv",
+ {30, 23, 22, 13},
+ { {"0010"_b, "ld1rsh_z_p_bi_s64"},
+ {"0011"_b, "ld1rsh_z_p_bi_s32"},
+ {"0110"_b, "ld1rsb_z_p_bi_s64"},
+ {"0111"_b, "ld1rsb_z_p_bi_s32"},
+ {"1000"_b, "ld1sw_z_p_ai_d"},
+ {"1001"_b, "ldff1sw_z_p_ai_d"},
+ {"1010"_b, "ld1sw_z_p_bz_d_64_scaled"},
+ {"1011"_b, "ldff1sw_z_p_bz_d_64_scaled"},
+ },
+ },
+
+ { "_zyjjgs",
+ {23, 22, 20, 19, 18},
+ { {"00000"_b, "orr_z_zi"},
+ {"01000"_b, "eor_z_zi"},
+ {"10000"_b, "and_z_zi"},
+ {"11000"_b, "dupm_z_i"},
+ {"xx1xx"_b, "cpy_z_o_i"},
+ },
+ },
+
+ { "_zylnnn",
+ {30},
+ { {"0"_b, "cbz_64_compbranch"},
+ },
+ },
+
+ { "_zytrsq",
+ {30},
+ { {"0"_b, "tbz_only_testbranch"},
+ },
+ },
+
+ { "_zyzzhm",
+ {23, 20, 19, 18, 17, 16},
+ { {"000001"_b, "frint32x_asimdmisc_r"},
+ },
+ },
+
+ { "_zzgrjz",
+ {18, 17},
+ { {"0x"_b, "ld3_asisdlsep_r3_r"},
+ {"10"_b, "ld3_asisdlsep_r3_r"},
+ {"11"_b, "ld3_asisdlsep_i3_i"},
+ },
+ },
+
+ { "_zzhgng",
+ {30, 23, 22, 13, 12, 11, 10},
+ { {"1000000"_b, "sha1c_qsv_cryptosha3"},
+ {"1000001"_b, "dup_asisdone_only"},
+ {"1000100"_b, "sha1p_qsv_cryptosha3"},
+ {"1001000"_b, "sha1m_qsv_cryptosha3"},
+ {"1001100"_b, "sha1su0_vvv_cryptosha3"},
+ {"1010111"_b, "fmulx_asisdsamefp16_only"},
+ {"1011001"_b, "fcmeq_asisdsamefp16_only"},
+ {"1011111"_b, "frecps_asisdsamefp16_only"},
+ {"1111111"_b, "frsqrts_asisdsamefp16_only"},
+ },
+ },
+
+ { "_zzrqlh",
+ {30, 23, 22, 11, 10},
+ { {"00000"_b, "_ygpjrl"},
+ {"01000"_b, "csel_32_condsel"},
+ {"01001"_b, "csinc_32_condsel"},
+ {"01100"_b, "_hggmnk"},
+ {"01101"_b, "_sllkpt"},
+ {"01110"_b, "_mgsvlj"},
+ {"01111"_b, "_kyyzks"},
+ {"10000"_b, "_zrvlnx"},
+ {"11000"_b, "csinv_32_condsel"},
+ {"11001"_b, "csneg_32_condsel"},
+ {"11100"_b, "_ghmzhr"},
+ {"11101"_b, "_gnqjhz"},
+ {"11110"_b, "_mmmjkx"},
+ },
+ },
+
+ { "_zzvxvh",
+ {23, 22, 11, 10},
+ { {"0001"_b, "pmul_z_zz"},
+ {"xx00"_b, "mul_z_zz"},
+ {"xx10"_b, "smulh_z_zz"},
+ {"xx11"_b, "umulh_z_zz"},
+ },
+ },
+
+ { "Root",
+ {31, 29, 28, 27, 26, 25, 24, 21, 15, 14},
+ { {"00000000xx"_b, "_qzjnpr"},
+ {"0000100000"_b, "_rzzxsn"},
+ {"0000100001"_b, "_xvppmm"},
+ {"0000100010"_b, "_ptsjnr"},
+ {"0000100011"_b, "_nlpmvl"},
+ {"0000100100"_b, "_ljljkv"},
+ {"0000100101"_b, "_kktglv"},
+ {"0000100110"_b, "_ppnssm"},
+ {"0000100111"_b, "_ztyqrj"},
+ {"0000101000"_b, "_rnqtmt"},
+ {"0000101001"_b, "_njgxlz"},
+ {"0000101010"_b, "_mpvsng"},
+ {"0000101011"_b, "_qlxksl"},
+ {"0000101100"_b, "_mhrjvp"},
+ {"0000101101"_b, "_pgjjsz"},
+ {"0000101110"_b, "_yppyky"},
+ {"0000101111"_b, "_yjmngt"},
+ {"000100000x"_b, "_vmjgmg"},
+ {"000100001x"_b, "_ytvxsl"},
+ {"0001000101"_b, "_yvhnlk"},
+ {"0001000111"_b, "_xryzqs"},
+ {"000101000x"_b, "_vjqsqs"},
+ {"000101010x"_b, "_phvnqh"},
+ {"000101100x"_b, "_pphhym"},
+ {"00010111xx"_b, "_qsygjs"},
+ {"0001100000"_b, "_jxrlyh"},
+ {"0001100001"_b, "_yqsgrt"},
+ {"0001100010"_b, "_kpyqyv"},
+ {"0001101000"_b, "_zkttzl"},
+ {"0001101001"_b, "_llqjlh"},
+ {"0001101010"_b, "_xhvtjg"},
+ {"0001101011"_b, "_xylmmp"},
+ {"0001101100"_b, "_vzzvlr"},
+ {"0001101101"_b, "_sjlrxn"},
+ {"0001101110"_b, "_xrhhjz"},
+ {"0001101111"_b, "_ygnypk"},
+ {"0001110000"_b, "_xjghst"},
+ {"0001110001"_b, "_xxyklv"},
+ {"0001110010"_b, "_rtgkkg"},
+ {"0001110100"_b, "_hqnxvt"},
+ {"0001110101"_b, "_hmxlny"},
+ {"0001110110"_b, "_txsmts"},
+ {"0001110111"_b, "_mtnpmr"},
+ {"0001111000"_b, "_ttstyt"},
+ {"0001111001"_b, "_krhrrr"},
+ {"0001111010"_b, "_xhltxn"},
+ {"0001111011"_b, "_ymznlj"},
+ {"0001111100"_b, "_kkgzst"},
+ {"0001111101"_b, "_gvjgyp"},
+ {"0001111110"_b, "_mjqvxq"},
+ {"0001111111"_b, "_spjjkg"},
+ {"0010001xxx"_b, "_vppthj"},
+ {"0010010xxx"_b, "_qzzlhq"},
+ {"001001100x"_b, "_zjslnr"},
+ {"001001110x"_b, "_jpxgqh"},
+ {"0010011x1x"_b, "_gkhhjm"},
+ {"0010100xxx"_b, "_jyxszq"},
+ {"0010110xxx"_b, "_xqhgkk"},
+ {"00101x1xxx"_b, "_zkqtrj"},
+ {"0011000xxx"_b, "_qkyjhg"},
+ {"00110010xx"_b, "_yjxshz"},
+ {"0011010000"_b, "_zzrqlh"},
+ {"0011010001"_b, "_qsrlql"},
+ {"001101001x"_b, "_tnrrjk"},
+ {"001101100x"_b, "_pnxgrg"},
+ {"001101101x"_b, "_ytsghm"},
+ {"0011100xxx"_b, "_srmhjk"},
+ {"0011110000"_b, "_zzhgng"},
+ {"0011110001"_b, "_zvqghy"},
+ {"001111001x"_b, "_hnzzkj"},
+ {"0011110100"_b, "_qntssm"},
+ {"0011110101"_b, "_mrqqlp"},
+ {"0011110110"_b, "_nxyhyv"},
+ {"0011110111"_b, "_qtknlp"},
+ {"0011111000"_b, "_gszlvl"},
+ {"0011111001"_b, "_mlnqrm"},
+ {"0011111010"_b, "_yvygml"},
+ {"0011111011"_b, "_xhxrnt"},
+ {"0011111100"_b, "_grqnlm"},
+ {"0011111101"_b, "_ktnjrx"},
+ {"0011111110"_b, "_gkpzhr"},
+ {"0011111111"_b, "_mpyhkm"},
+ {"0100100000"_b, "_yyyshx"},
+ {"0100100001"_b, "_mylphg"},
+ {"0100100010"_b, "_nsjhhg"},
+ {"0100100011"_b, "_rhhrhg"},
+ {"0100100100"_b, "_ymhgxg"},
+ {"0100100101"_b, "_nvkthr"},
+ {"0100100110"_b, "_phthqj"},
+ {"0100100111"_b, "_kyjxrr"},
+ {"0100101000"_b, "_gtvhmp"},
+ {"0100101001"_b, "_pppsmg"},
+ {"0100101010"_b, "_zgysvr"},
+ {"0100101011"_b, "_shqygv"},
+ {"0100101100"_b, "_lpsvyy"},
+ {"0100101101"_b, "_nqkhrv"},
+ {"0100101110"_b, "_tkjtgp"},
+ {"0100101111"_b, "_htqpks"},
+ {"0101000xxx"_b, "_vpkptr"},
+ {"0101001xxx"_b, "_vmjzyk"},
+ {"010101000x"_b, "_gmrxlp"},
+ {"010101010x"_b, "_jmgkrl"},
+ {"010101100x"_b, "_qhgtvk"},
+ {"01010111xx"_b, "_rxpspy"},
+ {"0101100xxx"_b, "_qhtqrj"},
+ {"0101101xxx"_b, "_vnpqrh"},
+ {"0101110000"_b, "_vpykkg"},
+ {"0101110001"_b, "_xrxvpr"},
+ {"0101110010"_b, "_zglksl"},
+ {"0101110011"_b, "_gtjskz"},
+ {"0101110100"_b, "_qntygx"},
+ {"0101110101"_b, "_kxprqm"},
+ {"0101110110"_b, "_qxtvzy"},
+ {"0101110111"_b, "_mstthg"},
+ {"0101111000"_b, "_qmqmpj"},
+ {"0101111001"_b, "_rhttgj"},
+ {"0101111010"_b, "_jqnhrj"},
+ {"0101111011"_b, "_nlqglq"},
+ {"0101111100"_b, "_vtxyxz"},
+ {"0101111101"_b, "_pqtjgx"},
+ {"0101111110"_b, "_snjpvy"},
+ {"0101111111"_b, "_spzgkt"},
+ {"0110001xxx"_b, "_plktrh"},
+ {"0110010xxx"_b, "_xtqmyj"},
+ {"0110011xxx"_b, "_lzpykk"},
+ {"0110100xxx"_b, "_mtzgpn"},
+ {"0110101xxx"_b, "_tvgvvq"},
+ {"01110000xx"_b, "_zxspnk"},
+ {"0111000100"_b, "_zqmmsk"},
+ {"0111000101"_b, "_nmzyvt"},
+ {"0111000110"_b, "_vvhzhv"},
+ {"0111000111"_b, "_sltqpy"},
+ {"0111001xxx"_b, "_qzsthq"},
+ {"0111010000"_b, "_zsyggq"},
+ {"0111010001"_b, "_hngpgx"},
+ {"011101001x"_b, "_njxtpv"},
+ {"01111000xx"_b, "_kpmvkn"},
+ {"0111100101"_b, "_jhytlg"},
+ {"0111100111"_b, "_rksxpn"},
+ {"01111001x0"_b, "_trlhgn"},
+ {"0111101xxx"_b, "_jxtgtx"},
+ {"0111110000"_b, "_tnhmpx"},
+ {"0111110010"_b, "_sqjpsl"},
+ {"0111110100"_b, "_sjnxky"},
+ {"0111110101"_b, "_kykymg"},
+ {"0111110110"_b, "_pxzkjy"},
+ {"0111110111"_b, "_tjktkm"},
+ {"0111111000"_b, "_hhkhkk"},
+ {"0111111001"_b, "_nxmjvy"},
+ {"0111111010"_b, "_vkvgnm"},
+ {"0111111011"_b, "_tssqsr"},
+ {"0111111100"_b, "_mthzvm"},
+ {"0111111101"_b, "_nlgqsk"},
+ {"0111111110"_b, "_gvykrp"},
+ {"0111111111"_b, "_sjzsvv"},
+ {"0x10000xxx"_b, "adr_only_pcreladdr"},
+ {"1000100000"_b, "_lspzrv"},
+ {"1000100001"_b, "_kxvvkq"},
+ {"1000100010"_b, "_sxpvym"},
+ {"1000100011"_b, "_vkrkks"},
+ {"1000100100"_b, "_xvnyxq"},
+ {"1000100101"_b, "_gtxpgx"},
+ {"1000100110"_b, "_vlrhpy"},
+ {"1000100111"_b, "_ymhkrx"},
+ {"1000101000"_b, "_zrmgjx"},
+ {"1000101001"_b, "_qqyryl"},
+ {"1000101010"_b, "_hgxtqy"},
+ {"1000101011"_b, "_yytvxh"},
+ {"1000101100"_b, "_ptslzg"},
+ {"1000101101"_b, "_ytkjxx"},
+ {"1000101110"_b, "_zxtzmv"},
+ {"1000101111"_b, "_kgmqkh"},
+ {"100100000x"_b, "_jhqlkv"},
+ {"100100001x"_b, "_lxgltj"},
+ {"1001000100"_b, "_hxzlmm"},
+ {"1001000101"_b, "_vllqmp"},
+ {"1001000110"_b, "_tlstgz"},
+ {"1001000111"_b, "_mrmpgh"},
+ {"10010100xx"_b, "_rzkmny"},
+ {"10010101xx"_b, "_jggvph"},
+ {"10010110xx"_b, "_nhkstj"},
+ {"10010111xx"_b, "_jsygzs"},
+ {"100111000x"_b, "_gmsgqz"},
+ {"1001110010"_b, "_grrjlh"},
+ {"1001110011"_b, "_jhkglp"},
+ {"100111010x"_b, "_qytrjj"},
+ {"1001110110"_b, "_qsqqxg"},
+ {"1001110111"_b, "_kypqpy"},
+ {"1010001xxx"_b, "_vsvtqz"},
+ {"1010010xxx"_b, "_vqzlzt"},
+ {"10100110xx"_b, "_xxpqgg"},
+ {"10100111xx"_b, "_rgjqzs"},
+ {"10101000xx"_b, "_qmrgkn"},
+ {"10101001xx"_b, "_jkxlnq"},
+ {"1010101000"_b, "_ggvztl"},
+ {"1010101001"_b, "_xlhjhx"},
+ {"101010101x"_b, "_nqgqjh"},
+ {"1010101100"_b, "_qsrtzz"},
+ {"1010101110"_b, "_tzzzxz"},
+ {"10101011x1"_b, "_lhmlrj"},
+ {"1010110000"_b, "_kkmxxx"},
+ {"1010110100"_b, "_ltvrrg"},
+ {"1010111000"_b, "_mqkjxj"},
+ {"1010111100"_b, "_pmrngh"},
+ {"101011xx10"_b, "_hsjynv"},
+ {"101011xxx1"_b, "_kmhtqp"},
+ {"1011000xxx"_b, "_ylhxlt"},
+ {"10110010xx"_b, "_gkxgsn"},
+ {"1011001100"_b, "_xzmjxk"},
+ {"1011001110"_b, "_ppqkym"},
+ {"10110011x1"_b, "_xzyxnr"},
+ {"1011010000"_b, "_xyljvp"},
+ {"1011010001"_b, "_sxnkrh"},
+ {"101101001x"_b, "_klthpn"},
+ {"101101100x"_b, "_xnsrny"},
+ {"101101101x"_b, "_htppjj"},
+ {"101101110x"_b, "_rmmmjj"},
+ {"101101111x"_b, "_txnqzy"},
+ {"1011100xxx"_b, "_gmvtss"},
+ {"10111100xx"_b, "_gnxgxs"},
+ {"1011110100"_b, "_zjgvyp"},
+ {"1100100000"_b, "_sjtrhm"},
+ {"1100100001"_b, "_hzkglv"},
+ {"1100100010"_b, "_qrygny"},
+ {"1100100011"_b, "_tjzqnp"},
+ {"1100100100"_b, "_yqvqtx"},
+ {"1100100101"_b, "_ngttyj"},
+ {"1100100110"_b, "_kqzmtr"},
+ {"1100100111"_b, "_qpvgnh"},
+ {"1100101000"_b, "_tpkslq"},
+ {"1100101001"_b, "_ympyng"},
+ {"1100101010"_b, "_ytvtqn"},
+ {"1100101011"_b, "_qvsypn"},
+ {"1100101100"_b, "_lqmksm"},
+ {"1100101101"_b, "_mkskxj"},
+ {"1100101110"_b, "_knkjnz"},
+ {"1100101111"_b, "_hxnmsl"},
+ {"1101000xxx"_b, "_shrsxr"},
+ {"1101001xxx"_b, "_xhkgqh"},
+ {"11010100xx"_b, "_rmxjsn"},
+ {"11010101xx"_b, "_mvzvpk"},
+ {"11010110xx"_b, "_ysjqhn"},
+ {"11010111xx"_b, "_lpkqzl"},
+ {"1101100xxx"_b, "_zpzghs"},
+ {"1101101xxx"_b, "_gmrxqq"},
+ {"1110001xxx"_b, "_jlqjzr"},
+ {"1110010xxx"_b, "_qgmngg"},
+ {"1110011xxx"_b, "_vlrrtz"},
+ {"1110100xxx"_b, "_zylnnn"},
+ {"1110101xxx"_b, "_yjjrgg"},
+ {"11110000xx"_b, "_qhtrnn"},
+ {"1111000100"_b, "_lrqkvp"},
+ {"1111000101"_b, "_pvkmmv"},
+ {"1111000110"_b, "_lxmyjh"},
+ {"1111000111"_b, "_vgrhsz"},
+ {"1111001xxx"_b, "_vqvqhp"},
+ {"1111010000"_b, "_yjsjvt"},
+ {"1111010010"_b, "_yzzlxs"},
+ {"11110100x1"_b, "_vkhhkk"},
+ {"11111000xx"_b, "_xrhmtg"},
+ {"11111001xx"_b, "_xprlgy"},
+ {"1111101xxx"_b, "_hjgylh"},
+ {"1x10000xxx"_b, "adrp_only_pcreladdr"},
+ {"x110110xxx"_b, "_zytrsq"},
+ {"x110111xxx"_b, "_kxsysq"},
+ },
+ },
};
+// clang-format on
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h
new file mode 100644
index 00000000..49c27b2b
--- /dev/null
+++ b/src/aarch64/decoder-visitor-map-aarch64.h
@@ -0,0 +1,2973 @@
+// Copyright 2020, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Initialisation data for a std::map, from instruction form to the visitor
+// function that handles it. This allows reuse of existing visitor functions
+// that support groups of instructions, though they may do extra decoding
+// no longer needed.
+// In the long term, it's expected that each component that uses the decoder
+// will want to group instruction handling in the way most appropriate to
+// the component's function, so this map initialisation will no longer be
+// shared.
+
+#define DEFAULT_FORM_TO_VISITOR_MAP(VISITORCLASS) \
+ {"abs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"addpl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \
+ {"addvl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \
+ {"add_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \
+ {"add_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"add_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"adr_z_az_d_s32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \
+ {"adr_z_az_d_u32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \
+ {"adr_z_az_sd_same_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \
+ {"ands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"andv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"and_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"and_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \
+ {"and_z_zi"_h, \
+ &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \
+ {"and_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \
+ {"asrd_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \
+ {"asrr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"asr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \
+ {"asr_z_p_zw"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \
+ {"asr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"asr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"asr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"bics_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"bic_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"bic_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \
+ {"bic_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \
+ {"brkas_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \
+ {"brka_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \
+ {"brkbs_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \
+ {"brkb_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \
+ {"brkns_p_p_pp"_h, \
+ &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \
+ {"brkn_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \
+ {"brkpas_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \
+ {"brkpa_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \
+ {"brkpbs_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \
+ {"brkpb_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \
+ {"clasta_r_p_z"_h, \
+ &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \
+ {"clasta_v_p_z"_h, \
+ &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \
+ {"clasta_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \
+ {"clastb_r_p_z"_h, \
+ &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \
+ {"clastb_v_p_z"_h, \
+ &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \
+ {"clastb_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \
+ {"cls_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"clz_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"cmpeq_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmpeq_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpeq_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpge_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmpge_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpge_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpgt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmpgt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpgt_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmphi_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \
+ {"cmphi_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmphi_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmphs_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \
+ {"cmphs_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmphs_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmple_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmple_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmplo_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \
+ {"cmplo_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpls_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \
+ {"cmpls_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmplt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmplt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpne_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \
+ {"cmpne_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cmpne_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \
+ {"cnot_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"cntb_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \
+ {"cntd_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \
+ {"cnth_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \
+ {"cntp_r_p_p"_h, &VISITORCLASS::VisitSVEPredicateCount}, \
+ {"cntw_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \
+ {"cnt_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"compact_z_p_z"_h, &VISITORCLASS::VisitSVECompressActiveElements}, \
+ {"cpy_z_o_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \
+ {"cpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \
+ {"cpy_z_p_r"_h, \
+ &VISITORCLASS::VisitSVECopyGeneralRegisterToVector_Predicated}, \
+ {"cpy_z_p_v"_h, \
+ &VISITORCLASS::VisitSVECopySIMDFPScalarRegisterToVector_Predicated}, \
+ {"ctermeq_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \
+ {"ctermne_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \
+ {"decb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"decd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"decd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"dech_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"dech_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"decp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"decp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"decw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"decw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"dupm_z_i"_h, &VISITORCLASS::VisitSVEBroadcastBitmaskImm}, \
+ {"dup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastIntImm_Unpredicated}, \
+ {"dup_z_r"_h, &VISITORCLASS::VisitSVEBroadcastGeneralRegister}, \
+ {"dup_z_zi"_h, &VISITORCLASS::VisitSVEBroadcastIndexElement}, \
+ {"eors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"eorv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"eor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"eor_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \
+ {"eor_z_zi"_h, \
+ &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \
+ {"eor_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \
+ {"ext_z_zi_des"_h, &VISITORCLASS::VisitSVEPermuteVectorExtract}, \
+ {"fabd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fabs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"facge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"facgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fadda_v_p_z"_h, &VISITORCLASS::VisitSVEFPAccumulatingReduction}, \
+ {"faddv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \
+ {"fadd_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fadd_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"fcadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPComplexAddition}, \
+ {"fcmeq_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmeq_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fcmge_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fcmgt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fcmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPComplexMulAdd}, \
+ {"fcmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \
+ {"fcmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \
+ {"fcmle_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmlt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmne_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \
+ {"fcmne_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fcmuo_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \
+ {"fcpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyFPImm_Predicated}, \
+ {"fcvtzs_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzs_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvtzu_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \
+ {"fcvt_z_p_z_d2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fcvt_z_p_z_d2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fcvt_z_p_z_h2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fcvt_z_p_z_h2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fcvt_z_p_z_s2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fcvt_z_p_z_s2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \
+ {"fdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fdup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastFPImm_Unpredicated}, \
+ {"fexpa_z_z"_h, &VISITORCLASS::VisitSVEFPExponentialAccelerator}, \
+ {"fmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fmaxnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \
+ {"fmaxnm_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fmaxnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fmaxv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \
+ {"fmax_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fmax_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fminnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \
+ {"fminnm_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fminnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fminv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \
+ {"fmin_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fmin_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fmla_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fmls_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmls_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmls_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \
+ {"fmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fmulx_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fmul_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fmul_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"fmul_z_zzi_d"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \
+ {"fmul_z_zzi_h"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \
+ {"fmul_z_zzi_s"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \
+ {"fneg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"fnmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fnmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fnmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"fnmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \
+ {"frecpe_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \
+ {"frecps_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"frecpx_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \
+ {"frinta_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frinti_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frintm_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frintn_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frintp_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frintx_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frintz_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \
+ {"frsqrte_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \
+ {"frsqrts_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"fscale_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fsqrt_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \
+ {"fsubr_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fsubr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fsub_z_p_zs"_h, \
+ &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \
+ {"fsub_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \
+ {"fsub_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"ftmad_z_zzi"_h, &VISITORCLASS::VisitSVEFPTrigMulAddCoefficient}, \
+ {"ftsmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \
+ {"ftssel_z_zz"_h, &VISITORCLASS::VisitSVEFPTrigSelectCoefficient}, \
+ {"incb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"incd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"incd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"inch_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"inch_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"incp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"incp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"incw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \
+ {"incw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \
+ {"index_z_ii"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \
+ {"index_z_ir"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \
+ {"index_z_ri"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \
+ {"index_z_rr"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \
+ {"insr_z_r"_h, &VISITORCLASS::VisitSVEInsertGeneralRegister}, \
+ {"insr_z_v"_h, &VISITORCLASS::VisitSVEInsertSIMDFPScalarRegister}, \
+ {"lasta_r_p_z"_h, \
+ &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \
+ {"lasta_v_p_z"_h, \
+ &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \
+ {"lastb_r_p_z"_h, \
+ &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \
+ {"lastb_v_p_z"_h, \
+ &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \
+ {"ld1b_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1b_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ld1b_z_p_bi_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1b_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1b_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1b_z_p_bi_u8"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1b_z_p_br_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1b_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1b_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1b_z_p_br_u8"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1b_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1b_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1b_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ld1d_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1d_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1d_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1d_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ld1d_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1d_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ld1d_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1h_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1h_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ld1h_z_p_bi_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1h_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1h_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1h_z_p_br_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1h_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1h_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1h_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ld1h_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1h_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ld1h_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1h_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \
+ {"ld1h_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ld1rb_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rb_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rb_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rb_z_p_bi_u8"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rd_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rh_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rh_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rh_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rqb_z_p_bi_u8"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \
+ {"ld1rqb_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \
+ {"ld1rqd_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \
+ {"ld1rqd_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \
+ {"ld1rqh_z_p_bi_u16"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \
+ {"ld1rqh_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \
+ {"ld1rqw_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \
+ {"ld1rqw_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \
+ {"ld1rsb_z_p_bi_s16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rsb_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rsb_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rsh_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rsh_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rsw_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rw_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1rw_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \
+ {"ld1sb_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1sb_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ld1sb_z_p_bi_s16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sb_z_p_bi_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sb_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sb_z_p_br_s16"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sb_z_p_br_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sb_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sb_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1sb_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1sb_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ld1sh_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1sh_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ld1sh_z_p_bi_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sh_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sh_z_p_br_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sh_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sh_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ld1sh_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1sh_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ld1sh_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1sh_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \
+ {"ld1sh_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ld1sw_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1sw_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1sw_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1sw_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ld1sw_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1sw_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ld1sw_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1w_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ld1w_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ld1w_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1w_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \
+ {"ld1w_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1w_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \
+ {"ld1w_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ld1w_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ld1w_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ld1w_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ld1w_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \
+ {"ld1w_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ld2b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld2b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld2d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld2d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld2h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld2h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld2w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld2w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld3b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld3b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld3d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld3d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld3h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld3h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld3w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld3w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld4b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld4b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld4d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld4d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld4h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld4h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ld4w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \
+ {"ld4w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \
+ {"ldff1b_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1b_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ldff1b_z_p_br_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1b_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1b_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1b_z_p_br_u8"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1b_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1b_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1b_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ldff1d_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1d_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1d_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ldff1d_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1d_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ldff1d_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1h_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1h_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ldff1h_z_p_br_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1h_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1h_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1h_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ldff1h_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1h_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ldff1h_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1h_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \
+ {"ldff1h_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ldff1sb_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1sb_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ldff1sb_z_p_br_s16"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sb_z_p_br_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sb_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sb_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1sb_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1sb_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ldff1sh_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1sh_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ldff1sh_z_p_br_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sh_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sh_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ldff1sh_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1sh_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ldff1sh_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1sh_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \
+ {"ldff1sh_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ldff1sw_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1sw_z_p_br_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1sw_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ldff1sw_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1sw_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ldff1sw_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1w_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \
+ {"ldff1w_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \
+ {"ldff1w_z_p_br_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1w_z_p_br_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \
+ {"ldff1w_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \
+ {"ldff1w_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \
+ {"ldff1w_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \
+ {"ldff1w_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"ldff1w_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \
+ {"ldff1w_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \
+ {"ldnf1b_z_p_bi_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1b_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1b_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1b_z_p_bi_u8"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1d_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1h_z_p_bi_u16"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1h_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1h_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sb_z_p_bi_s16"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sb_z_p_bi_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sb_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sh_z_p_bi_s32"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sh_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1sw_z_p_bi_s64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1w_z_p_bi_u32"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnf1w_z_p_bi_u64"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \
+ {"ldnt1b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \
+ {"ldnt1b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \
+ {"ldnt1d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \
+ {"ldnt1d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \
+ {"ldnt1h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \
+ {"ldnt1h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \
+ {"ldnt1w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \
+ {"ldnt1w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \
+ {"ldr_p_bi"_h, &VISITORCLASS::VisitSVELoadPredicateRegister}, \
+ {"ldr_z_bi"_h, &VISITORCLASS::VisitSVELoadVectorRegister}, \
+ {"lslr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"lsl_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \
+ {"lsl_z_p_zw"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \
+ {"lsl_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"lsl_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"lsl_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"lsrr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"lsr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \
+ {"lsr_z_p_zw"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \
+ {"lsr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \
+ {"lsr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"lsr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \
+ {"mad_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \
+ {"mla_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \
+ {"mls_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \
+ {"movprfx_z_p_z"_h, &VISITORCLASS::VisitSVEMovprfx}, \
+ {"movprfx_z_z"_h, \
+ &VISITORCLASS::VisitSVEConstructivePrefix_Unpredicated}, \
+ {"msb_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \
+ {"mul_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \
+ {"mul_z_zi"_h, &VISITORCLASS::VisitSVEIntMulImm_Unpredicated}, \
+ {"nands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"nand_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"neg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"nors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"nor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"not_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"orns_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"orn_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"orrs_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"orr_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"orr_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \
+ {"orr_z_zi"_h, \
+ &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \
+ {"orr_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \
+ {"orv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"pfalse_p"_h, &VISITORCLASS::VisitSVEPredicateZero}, \
+ {"pfirst_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateFirstActive}, \
+ {"pnext_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateNextActive}, \
+ {"prfb_i_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \
+ {"prfb_i_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \
+ {"prfb_i_p_bi_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \
+ {"prfb_i_p_br_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \
+ {"prfb_i_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \
+ {"prfb_i_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"prfb_i_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \
+ {"prfd_i_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \
+ {"prfd_i_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \
+ {"prfd_i_p_bi_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \
+ {"prfd_i_p_br_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \
+ {"prfd_i_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \
+ {"prfd_i_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"prfd_i_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \
+ {"prfh_i_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \
+ {"prfh_i_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \
+ {"prfh_i_p_bi_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \
+ {"prfh_i_p_br_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \
+ {"prfh_i_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \
+ {"prfh_i_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"prfh_i_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \
+ {"prfw_i_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \
+ {"prfw_i_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \
+ {"prfw_i_p_bi_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \
+ {"prfw_i_p_br_s"_h, \
+ &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \
+ {"prfw_i_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \
+ {"prfw_i_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"prfw_i_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \
+ {"ptest_p_p"_h, &VISITORCLASS::VisitSVEPredicateTest}, \
+ {"ptrues_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \
+ {"ptrue_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \
+ {"punpkhi_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \
+ {"punpklo_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \
+ {"rbit_z_p_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \
+ {"rdffrs_p_p_f"_h, \
+ &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \
+ {"rdffr_p_f"_h, \
+ &VISITORCLASS::VisitSVEPredicateReadFromFFR_Unpredicated}, \
+ {"rdffr_p_p_f"_h, \
+ &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \
+ {"rdvl_r_i"_h, &VISITORCLASS::VisitSVEStackFrameSize}, \
+ {"revb_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \
+ {"revh_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \
+ {"revw_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \
+ {"rev_p_p"_h, &VISITORCLASS::VisitSVEReversePredicateElements}, \
+ {"rev_z_z"_h, &VISITORCLASS::VisitSVEReverseVectorElements}, \
+ {"sabd_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"saddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"scvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"scvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"sdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \
+ {"sdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \
+ {"sdot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \
+ {"sdot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \
+ {"sdot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \
+ {"sel_p_p_pp"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \
+ {"sel_z_p_zz"_h, &VISITORCLASS::VisitSVEVectorSelect}, \
+ {"setffr_f"_h, &VISITORCLASS::VisitSVEFFRInitialise}, \
+ {"smaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"smax_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"smax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \
+ {"sminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"smin_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"smin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \
+ {"smulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \
+ {"splice_z_p_zz_des"_h, &VISITORCLASS::VisitSVEVectorSplice}, \
+ {"sqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"sqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"sqdecb_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecb_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecd_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecd_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecd_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqdech_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdech_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdech_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqdecp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqdecw_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecw_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqdecw_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqincb_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincb_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincd_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincd_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincd_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqinch_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqinch_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqinch_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqincp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"sqincw_r_rs_sx"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincw_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"sqincw_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"sqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"sqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"st1b_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \
+ {"st1b_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \
+ {"st1b_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \
+ {"st1b_z_p_br"_h, \
+ &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \
+ {"st1b_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \
+ {"st1b_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"st1b_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \
+ {"st1d_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \
+ {"st1d_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \
+ {"st1d_z_p_br"_h, \
+ &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \
+ {"st1d_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \
+ {"st1d_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \
+ {"st1d_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"st1d_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"st1h_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \
+ {"st1h_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \
+ {"st1h_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \
+ {"st1h_z_p_br"_h, \
+ &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \
+ {"st1h_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \
+ {"st1h_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \
+ {"st1h_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"st1h_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"st1h_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \
+ {"st1h_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \
+ {"st1w_z_p_ai_d"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \
+ {"st1w_z_p_ai_s"_h, \
+ &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \
+ {"st1w_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \
+ {"st1w_z_p_br"_h, \
+ &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \
+ {"st1w_z_p_bz_d_64_scaled"_h, \
+ &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \
+ {"st1w_z_p_bz_d_64_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \
+ {"st1w_z_p_bz_d_x32_scaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \
+ {"st1w_z_p_bz_d_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \
+ {"st1w_z_p_bz_s_x32_scaled"_h, \
+ &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \
+ {"st1w_z_p_bz_s_x32_unscaled"_h, \
+ &VISITORCLASS:: \
+ VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \
+ {"st2b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st2b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st2d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st2d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st2h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st2h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st2w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st2w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st3b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st3b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st3d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st3d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st3h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st3h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st3w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st3w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st4b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st4b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st4d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st4d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st4h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st4h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"st4w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \
+ {"st4w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \
+ {"stnt1b_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \
+ {"stnt1b_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \
+ {"stnt1d_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \
+ {"stnt1d_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \
+ {"stnt1h_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \
+ {"stnt1h_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \
+ {"stnt1w_z_p_bi_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \
+ {"stnt1w_z_p_br_contiguous"_h, \
+ &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \
+ {"str_p_bi"_h, &VISITORCLASS::VisitSVEStorePredicateRegister}, \
+ {"str_z_bi"_h, &VISITORCLASS::VisitSVEStoreVectorRegister}, \
+ {"subr_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \
+ {"subr_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"sub_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \
+ {"sub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"sub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"sunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \
+ {"sunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \
+ {"sxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"sxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"sxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"tbl_z_zz_1"_h, &VISITORCLASS::VisitSVETableLookup}, \
+ {"trn1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"trn1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"trn2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"trn2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"uabd_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"uaddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"ucvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"ucvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \
+ {"udf_only_perm_undef"_h, &VISITORCLASS::VisitReserved}, \
+ {"udivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \
+ {"udiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \
+ {"udot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \
+ {"udot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \
+ {"udot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \
+ {"umaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"umax_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"umax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \
+ {"uminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \
+ {"umin_z_p_zz"_h, \
+ &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \
+ {"umin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \
+ {"umulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \
+ {"uqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"uqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"uqdecb_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecb_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecd_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecd_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecd_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqdech_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdech_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdech_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqdecp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqdecw_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecw_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqdecw_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqincb_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincb_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincd_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincd_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincd_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqinch_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqinch_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqinch_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqincp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \
+ {"uqincw_r_rs_uw"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincw_r_rs_x"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \
+ {"uqincw_z_zs"_h, \
+ &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \
+ {"uqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \
+ {"uqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \
+ {"uunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \
+ {"uunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \
+ {"uxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"uxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"uxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \
+ {"uzp1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"uzp1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"uzp2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"uzp2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"whilele_p_p_rr"_h, \
+ &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \
+ {"whilelo_p_p_rr"_h, \
+ &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \
+ {"whilels_p_p_rr"_h, \
+ &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \
+ {"whilelt_p_p_rr"_h, \
+ &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \
+ {"wrffr_f_p"_h, &VISITORCLASS::VisitSVEFFRWriteFromPredicate}, \
+ {"zip1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"zip1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"zip2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \
+ {"zip2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \
+ {"adds_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"adds_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"add_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"add_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"subs_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"subs_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"sub_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"sub_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \
+ {"adds_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"adds_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"add_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"add_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"subs_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"subs_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"sub_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"sub_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \
+ {"adds_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"adds_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"add_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"add_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"subs_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"subs_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"sub_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"sub_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \
+ {"adcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"adcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"adc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"adc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"sbcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"sbcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"sbc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"sbc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \
+ {"ldaddab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldadda_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldadda_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaddl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldadd_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldadd_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaprb_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldaprh_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldapr_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldapr_64l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclra_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclra_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclrl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclr_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldclr_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeoralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeoralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeoral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeoral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeora_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeora_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeorl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeor_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldeor_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldseta_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldseta_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldseth_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsetl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldset_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldset_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldsmin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"lduminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"ldumin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swph_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swplb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swplh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swpl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swp_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"swp_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \
+ {"bfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"bfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"sbfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"sbfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"ubfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"ubfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \
+ {"cbnz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \
+ {"cbnz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \
+ {"cbz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \
+ {"cbz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \
+ {"b_only_condbranch"_h, &VISITORCLASS::VisitConditionalBranch}, \
+ {"ccmn_32_condcmp_imm"_h, \
+ &VISITORCLASS::VisitConditionalCompareImmediate}, \
+ {"ccmn_64_condcmp_imm"_h, \
+ &VISITORCLASS::VisitConditionalCompareImmediate}, \
+ {"ccmp_32_condcmp_imm"_h, \
+ &VISITORCLASS::VisitConditionalCompareImmediate}, \
+ {"ccmp_64_condcmp_imm"_h, \
+ &VISITORCLASS::VisitConditionalCompareImmediate}, \
+ {"ccmn_32_condcmp_reg"_h, \
+ &VISITORCLASS::VisitConditionalCompareRegister}, \
+ {"ccmn_64_condcmp_reg"_h, \
+ &VISITORCLASS::VisitConditionalCompareRegister}, \
+ {"ccmp_32_condcmp_reg"_h, \
+ &VISITORCLASS::VisitConditionalCompareRegister}, \
+ {"ccmp_64_condcmp_reg"_h, \
+ &VISITORCLASS::VisitConditionalCompareRegister}, \
+ {"csel_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csel_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csinc_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csinc_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csinv_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csinv_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csneg_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"csneg_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \
+ {"sha1h_ss_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \
+ {"sha1su1_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \
+ {"sha256su0_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \
+ {"sha1c_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha1m_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha1p_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha1su0_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha256h2_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha256h_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"sha256su1_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \
+ {"aesd_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \
+ {"aese_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \
+ {"aesimc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \
+ {"aesmc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \
+ {"autda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autiza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"autizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"cls_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"cls_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"clz_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"clz_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"paciza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"pacizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rbit_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rbit_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rev16_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rev16_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rev32_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rev_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"rev_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"xpacd_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"xpaci_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \
+ {"asrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"asrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32b_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32cb_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32ch_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32cw_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32cx_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32h_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32w_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"crc32x_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"gmi_64g_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"irg_64i_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"lslv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"lslv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"lsrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"lsrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"pacga_64p_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"rorv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"rorv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"sdiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"sdiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"udiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"udiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \
+ {"madd_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"madd_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"msub_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"msub_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"smaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"smsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"smulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"umaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"umsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"umulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \
+ {"setf16_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \
+ {"setf8_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \
+ {"brk_ex_exception"_h, &VISITORCLASS::VisitException}, \
+ {"dcps1_dc_exception"_h, &VISITORCLASS::VisitException}, \
+ {"dcps2_dc_exception"_h, &VISITORCLASS::VisitException}, \
+ {"dcps3_dc_exception"_h, &VISITORCLASS::VisitException}, \
+ {"hlt_ex_exception"_h, &VISITORCLASS::VisitException}, \
+ {"hvc_ex_exception"_h, &VISITORCLASS::VisitException}, \
+ {"smc_ex_exception"_h, &VISITORCLASS::VisitException}, \
+ {"svc_ex_exception"_h, &VISITORCLASS::VisitException}, \
+ {"extr_32_extract"_h, &VISITORCLASS::VisitExtract}, \
+ {"extr_64_extract"_h, &VISITORCLASS::VisitExtract}, \
+ {"fcmpe_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmpe_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmpe_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmpe_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmpe_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmpe_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fcmp_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \
+ {"fccmpe_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fccmpe_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fccmpe_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fccmp_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fccmp_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fccmp_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \
+ {"fcsel_d_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \
+ {"fcsel_h_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \
+ {"fcsel_s_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \
+ {"bfcvt_bs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fabs_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fabs_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fabs_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_dh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_ds_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_hd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_hs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_sd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fcvt_sh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fmov_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fmov_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fmov_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fneg_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fneg_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fneg_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint32x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint32x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint32z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint32z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint64x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint64x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint64z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frint64z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinta_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinta_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinta_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinti_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinti_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frinti_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintm_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintm_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintm_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintn_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintn_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintn_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintp_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintp_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintp_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintx_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintx_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintx_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintz_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintz_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"frintz_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fsqrt_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fsqrt_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fsqrt_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \
+ {"fadd_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fadd_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fadd_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fdiv_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fdiv_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fdiv_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmaxnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmaxnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmaxnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmax_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmax_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmax_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fminnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fminnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fminnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmin_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmin_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmin_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fnmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fnmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fnmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fsub_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fsub_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fsub_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \
+ {"fmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fnmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \
+ {"fcvtzs_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzs_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzs_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzs_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzs_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzs_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fcvtzu_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"scvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"ucvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \
+ {"fmov_d_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \
+ {"fmov_h_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \
+ {"fmov_s_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \
+ {"fcvtas_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtas_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtas_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtas_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtas_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtas_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtau_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtms_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtmu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtns_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtnu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtps_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtpu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzs_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fcvtzu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fjcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_64vx_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"fmov_v64i_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"scvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ucvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \
+ {"ldrsw_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"ldr_32_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"ldr_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"ldr_d_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"ldr_q_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"ldr_s_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"prfm_p_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \
+ {"casab_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casah_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casalb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casalh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casal_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casal_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casa_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casa_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"cash_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caslb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caslh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casl_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casl_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspal_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspal_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspa_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspa_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspl_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"caspl_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casp_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"casp_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"cas_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"cas_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldaxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldlarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldlarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldlar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldlar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stllrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stllrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stllr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stllr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stlxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"stxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \
+ {"ldraa_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \
+ {"ldraa_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \
+ {"ldrab_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \
+ {"ldrab_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \
+ {"ldnp_32_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"ldnp_64_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"ldnp_d_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"ldnp_q_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"ldnp_s_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"stnp_32_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"stnp_64_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"stnp_d_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"stnp_q_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"stnp_s_ldstnapair_offs"_h, \
+ &VISITORCLASS::VisitLoadStorePairNonTemporal}, \
+ {"ldpsw_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"stp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"stp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"stp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"stp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"stp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \
+ {"ldpsw_64_ldstpair_post"_h, \
+ &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"stp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"stp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"stp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"stp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"stp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \
+ {"ldpsw_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"stp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"stp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"stp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"stp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"stp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \
+ {"ldrb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrsb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrsb_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrsh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrsh_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrsw_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldr_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"strb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"strh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"str_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \
+ {"ldrb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrsb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrsb_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrsh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrsh_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldrsw_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldr_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"strb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"strh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"str_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \
+ {"ldapurb_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapurh_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapursb_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapursb_64_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapursh_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapursh_64_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapursw_64_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapur_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldapur_64_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"stlurb_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"stlurh_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"stlur_32_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"stlur_64_ldapstl_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \
+ {"ldrb_32bl_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsb_32bl_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsb_32b_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsb_64bl_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsb_64b_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsh_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldrsw_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldr_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"prfm_p_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"strb_32bl_ldst_regoff"_h, \
+ &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"strb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"strh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"str_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \
+ {"ldurb_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldurh_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldursb_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldursb_64_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldursh_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldursh_64_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldursw_64_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_64_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"prfum_p_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"sturb_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"sturh_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_32_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_64_ldst_unscaled"_h, \
+ &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"stur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \
+ {"ldrb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrsb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrsb_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrsh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrsh_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldrsw_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ldr_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"prfm_p_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"strb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"strh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"str_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \
+ {"ands_32s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"ands_64s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"and_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"and_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"eor_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"eor_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"orr_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"orr_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \
+ {"ands_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"ands_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"and_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"and_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"bics_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"bics_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"bic_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"bic_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"eon_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"eon_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"eor_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"eor_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"orn_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"orn_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"orr_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"orr_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \
+ {"movk_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"movk_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"movn_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"movn_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"movz_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"movz_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \
+ {"fabs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcmeq_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcmge_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcmgt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcmle_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcmlt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtas_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtau_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtms_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtmu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtns_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtnu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtps_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtpu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtzs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fcvtzu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fneg_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frecpe_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frinta_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frinti_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frintm_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frintn_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frintp_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frintx_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frintz_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"frsqrte_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"fsqrt_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"scvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"ucvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \
+ {"addhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"raddhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"rsubhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"sabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"sabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"saddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"saddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"smlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"smlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"smull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"sqdmlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"sqdmlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"sqdmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"ssubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"ssubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"subhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"uabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"uabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"uaddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"uaddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"umlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"umlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"umull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"usubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"usubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \
+ {"addp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"add_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmhi_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmhs_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"cmtst_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"facge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"facgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"faddp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fcmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fcmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fcmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fdiv_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmaxnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmaxnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fminnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fminnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmulx_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"frecps_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"frsqrts_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqrdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"srshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"urshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"ushl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fcadd_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"fcmla_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"sdot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"sqrdmlah_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"sqrdmlsh_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"udot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \
+ {"fabd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"facge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"facgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"faddp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fadd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fcmeq_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fcmge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fcmgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fdiv_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmaxnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmaxnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmaxp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmax_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fminnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fminnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fminp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmin_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmla_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmls_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmulx_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fmul_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"frecps_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"frsqrts_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"fsub_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \
+ {"addv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"saddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"smaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"sminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"uaddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"umaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"uminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"mla_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"mls_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"mul_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"sqdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"sqrdmlah_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"sqrdmlsh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"sqrdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \
+ {"dup_asimdins_dr_r"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"dup_asimdins_dv_v"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"ins_asimdins_ir_r"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"ins_asimdins_iv_v"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"smov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"smov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"umov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"umov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \
+ {"ext_asimdext_only"_h, &VISITORCLASS::VisitNEONExtract}, \
+ {"ld1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"st4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \
+ {"ld1_asisdlsep_i1_i1"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_i2_i2"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_i3_i3"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_i4_i4"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_r1_r1"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_r2_r2"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_r3_r3"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1_asisdlsep_r4_r4"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld2_asisdlsep_i2_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld2_asisdlsep_r2_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld3_asisdlsep_i3_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld3_asisdlsep_r3_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld4_asisdlsep_i4_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld4_asisdlsep_r4_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_i1_i1"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_i2_i2"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_i3_i3"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_i4_i4"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_r1_r1"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_r2_r2"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_r3_r3"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st1_asisdlsep_r4_r4"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st2_asisdlsep_i2_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st2_asisdlsep_r2_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st3_asisdlsep_i3_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st3_asisdlsep_r3_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st4_asisdlsep_i4_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"st4_asisdlsep_r4_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \
+ {"ld1r_asisdlso_r1"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld2r_asisdlso_r2"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld3r_asisdlso_r3"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld4r_asisdlso_r4"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"st4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \
+ {"ld1r_asisdlsop_r1_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1r_asisdlsop_rx1_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_b1_i1b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_bx1_r1b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_d1_i1d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_dx1_r1d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_h1_i1h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_hx1_r1h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_s1_i1s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld1_asisdlsop_sx1_r1s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2r_asisdlsop_r2_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2r_asisdlsop_rx2_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_b2_i2b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_bx2_r2b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_d2_i2d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_dx2_r2d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_h2_i2h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_hx2_r2h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_s2_i2s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld2_asisdlsop_sx2_r2s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3r_asisdlsop_r3_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3r_asisdlsop_rx3_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_b3_i3b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_bx3_r3b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_d3_i3d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_dx3_r3d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_h3_i3h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_hx3_r3h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_s3_i3s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld3_asisdlsop_sx3_r3s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4r_asisdlsop_r4_i"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4r_asisdlsop_rx4_r"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_b4_i4b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_bx4_r4b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_d4_i4d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_dx4_r4d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_h4_i4h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_hx4_r4h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_s4_i4s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"ld4_asisdlsop_sx4_r4s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_b1_i1b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_bx1_r1b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_d1_i1d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_dx1_r1d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_h1_i1h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_hx1_r1h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_s1_i1s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st1_asisdlsop_sx1_r1s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_b2_i2b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_bx2_r2b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_d2_i2d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_dx2_r2d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_h2_i2h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_hx2_r2h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_s2_i2s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st2_asisdlsop_sx2_r2s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_b3_i3b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_bx3_r3b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_d3_i3d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_dx3_r3d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_h3_i3h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_hx3_r3h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_s3_i3s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st3_asisdlsop_sx3_r3s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_b4_i4b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_bx4_r4b"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_d4_i4d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_dx4_r4d"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_h4_i4h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_hx4_r4h"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_s4_i4s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"st4_asisdlsop_sx4_r4s"_h, \
+ &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \
+ {"bic_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"bic_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"fmov_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"fmov_asimdimm_h_h"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"fmov_asimdimm_s_s"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_d_ds"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"movi_asimdimm_n_b"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"mvni_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"mvni_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"mvni_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"orr_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"orr_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \
+ {"trn1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"trn2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"uzp1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"uzp2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"zip1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"zip2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \
+ {"sqabs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"sqneg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"sqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"sqxtun_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"suqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"uqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"usqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmeq_asisdmiscfp16_fz"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcmge_asisdmiscfp16_fz"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcmgt_asisdmiscfp16_fz"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcmle_asisdmiscfp16_fz"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcmlt_asisdmiscfp16_fz"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtas_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtau_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtms_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtmu_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtns_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtnu_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtps_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtpu_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtzs_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"fcvtzu_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"frecpe_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"frecpx_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"frsqrte_asisdmiscfp16_r"_h, \
+ &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"scvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"ucvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \
+ {"sqdmlal_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \
+ {"sqdmlsl_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \
+ {"sqdmull_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \
+ {"sqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqrdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"srshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"uqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"uqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"uqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"uqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"urshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"ushl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"fabd_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"facge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"facgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"fcmeq_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"fcmge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"fcmgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"fmulx_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"frecps_asisdsamefp16_only"_h, \
+ &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"frsqrts_asisdsamefp16_only"_h, \
+ &VISITORCLASS::VisitNEONScalar3SameFP16}, \
+ {"sqdmulh_asisdelem_r"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"sqrdmlah_asisdelem_r"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"sqrdmlsh_asisdelem_r"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"sqrdmulh_asisdelem_r"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"dup_asisdone_only"_h, &VISITORCLASS::VisitNEONScalarCopy}, \
+ {"addp_asisdpair_only"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"faddp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"faddp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fmaxnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fmaxnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fmaxp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fmaxp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fminnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fminnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fminp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fminp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \
+ {"fcvtzs_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"fcvtzu_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"scvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqshlu_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"ucvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"uqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqshlu_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"uqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"shl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sli_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"tbl_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbl_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbl_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbl_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbx_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbx_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbx_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"tbx_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \
+ {"adrp_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \
+ {"adr_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \
+ {"rmif_only_rmif"_h, &VISITORCLASS::VisitRotateRightIntoFlags}, \
+ {"bti_hb_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"clrex_bn_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"dmb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"dsb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"hint_hm_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"mrs_rs_systemmove"_h, &VISITORCLASS::VisitSystem}, \
+ {"msr_si_pstate"_h, &VISITORCLASS::VisitSystem}, \
+ {"msr_sr_systemmove"_h, &VISITORCLASS::VisitSystem}, \
+ {"psb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"sb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"sysl_rc_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \
+ {"sys_cr_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \
+ {"tcommit_only_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"tsb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"tbnz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \
+ {"tbz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \
+ {"bl_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \
+ {"b_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \
+ {"blraaz_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"blraa_64p_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"blrabz_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"blrab_64p_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"blr_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"braaz_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"braa_64p_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"brabz_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"brab_64p_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"br_64_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"drps_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"eretaa_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"eretab_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"eret_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"retaa_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"retab_64e_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"ret_64r_branch_reg"_h, \
+ &VISITORCLASS::VisitUnconditionalBranchToRegister}, \
+ {"addg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlal_asimdelem_f"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlal_asimdsame2_f_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldg_64loffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrsb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrsb_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrsh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrsh_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sha512su1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stgp_64_ldstpair_off"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stgp_64_ldstpair_post"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stgp_64_ldstpair_pre"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sttrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sttrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sttr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sttr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stz2g_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stz2g_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stz2g_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stzgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stzg_64soffset_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stzg_64spost_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"stzg_64spre_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"subg_64_addsub_immtags"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"subps_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"subp_64s_dp_2src"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"tcancel_ex_exception"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"tstart_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfdot_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfdot_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlalb_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlalb_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlalt_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmlalt_z_zzzi_"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"bfmmla_z_zzz_"_h, &VISITORCLASS::VisitUnimplemented}, { \
+ "unallocated"_h, &VISITORCLASS::VisitUnallocated \
+ }
+
+#define SIM_AUD_VISITOR_MAP(VISITORCLASS) \
+ {"autia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"autiasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"autiaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"autib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"autibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"autibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"axflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \
+ {"cfinv_m_pstate"_h, &VISITORCLASS::VisitSystem}, \
+ {"csdb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"dgh_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"esb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"isb_bi_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"nop_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"pacia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"paciasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"paciaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"pacib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"pacibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"pacibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"pssbb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"sev_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"sevl_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"ssbb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \
+ {"wfe_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"wfi_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"xaflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \
+ {"xpaclri_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"yield_hi_hints"_h, &VISITORCLASS::VisitSystem}, \
+ {"abs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cls_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"clz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cmeq_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cmge_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cmgt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cmle_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cmlt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"cnt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcmeq_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcmge_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcmgt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcmle_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcmlt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtas_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtau_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtl_asimdmisc_l"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtms_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtmu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtns_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtnu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtps_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtpu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtxn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtzs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fcvtzu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frint32x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frint32z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frint64x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frint64z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frinta_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frinti_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frintm_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frintn_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frintp_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frintx_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frintz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"frsqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"fsqrt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"neg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"not_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"rbit_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"rev16_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"rev32_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"rev64_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"sadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"saddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"scvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"shll_asimdmisc_s"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"sqabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"sqneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"sqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"sqxtun_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"suqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"uadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"uaddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"ucvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"uqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"urecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"ursqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"usqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"xtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \
+ {"mla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"mls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"mul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"saba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"shadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"shsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"smaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"smax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"sminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"smin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"srhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uaba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uhsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"umaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"umax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"uminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"umin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"urhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"and_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"bic_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"bif_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"bit_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"bsl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"eor_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"orr_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"orn_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"pmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmlal2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmlal_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmlsl2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"fmlsl_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \
+ {"ushll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sshll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"shrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"rshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqrshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"uqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"uqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sri_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"srshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"srsra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"ssra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"urshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"ursra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"ushr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"usra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"scvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"ucvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"fcvtzs_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"fcvtzu_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \
+ {"sqdmlal_asisdelem_l"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"sqdmlsl_asisdelem_l"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"sqdmull_asisdelem_l"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmla_asisdelem_rh_h"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmla_asisdelem_r_sd"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmls_asisdelem_rh_h"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmls_asisdelem_r_sd"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmulx_asisdelem_rh_h"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmulx_asisdelem_r_sd"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmul_asisdelem_rh_h"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fmul_asisdelem_r_sd"_h, \
+ &VISITORCLASS::VisitNEONScalarByIndexedElement}, \
+ {"fabd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"facge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"facgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"fcmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"fcmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"fcmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"fmulx_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"frecps_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"frsqrts_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmhi_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmhs_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"cmtst_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"add_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \
+ {"sqrdmlah_asisdsame2_only"_h, \
+ &VISITORCLASS::VisitNEONScalar3SameExtra}, \
+ {"sqrdmlsh_asisdsame2_only"_h, \
+ &VISITORCLASS::VisitNEONScalar3SameExtra}, \
+ {"fmaxnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fmaxv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fminnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fminv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fmaxnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fminnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fmaxv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"fminv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \
+ {"shl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sli_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sri_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"srshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"srsra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"ssra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"urshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"ursra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"ushr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"usra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqrshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"sqshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"uqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"uqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \
+ {"cmeq_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"cmge_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"cmgt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"cmle_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"cmlt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"abs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"neg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmeq_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmge_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmgt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmle_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcmlt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtas_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtau_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtms_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtmu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtns_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtnu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtps_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtpu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtxn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtzs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"fcvtzu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"frecpe_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"frecpx_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"frsqrte_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \
+ {"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, { \
+ "ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc \
+ }
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 83ce3174..3d3e5fd6 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -34,6 +34,648 @@ namespace vixl {
namespace aarch64 {
+const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() {
+ static const FormToVisitorFnMap form_to_visitor = {
+ DEFAULT_FORM_TO_VISITOR_MAP(Disassembler),
+ {"autia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"autiasp_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"autiaz_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"autib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"autibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"autibz_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"axflag_m_pstate"_h, &Disassembler::DisassembleNoArgs},
+ {"cfinv_m_pstate"_h, &Disassembler::DisassembleNoArgs},
+ {"csdb_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"dgh_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"ssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs},
+ {"pssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs},
+ {"esb_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"isb_bi_barriers"_h, &Disassembler::DisassembleNoArgs},
+ {"nop_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"pacia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"paciasp_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"paciaz_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"pacib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"pacibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"pacibz_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"sev_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"sevl_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"wfe_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"wfi_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"xaflag_m_pstate"_h, &Disassembler::DisassembleNoArgs},
+ {"xpaclri_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"yield_hi_hints"_h, &Disassembler::DisassembleNoArgs},
+ {"abs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"cls_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"clz_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"cnt_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"neg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"rev16_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"rev32_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"rev64_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"sqabs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"sqneg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"suqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"urecpe_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"ursqrte_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"usqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc},
+ {"not_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical},
+ {"rbit_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical},
+ {"xtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract},
+ {"sqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract},
+ {"uqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract},
+ {"sqxtun_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract},
+ {"shll_asimdmisc_s"_h, &Disassembler::DisassembleNEON2RegExtract},
+ {"sadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp},
+ {"saddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp},
+ {"uadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp},
+ {"uaddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp},
+ {"cmeq_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare},
+ {"cmge_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare},
+ {"cmgt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare},
+ {"cmle_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare},
+ {"cmlt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare},
+ {"fcmeq_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare},
+ {"fcmge_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare},
+ {"fcmgt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare},
+ {"fcmle_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare},
+ {"fcmlt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare},
+ {"fcvtl_asimdmisc_l"_h, &Disassembler::DisassembleNEON2RegFPConvert},
+ {"fcvtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert},
+ {"fcvtxn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert},
+ {"fabs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtas_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtau_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtms_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtmu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtns_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtnu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtps_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtpu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtzs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fcvtzu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fneg_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frecpe_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frint32x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frint32z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frint64x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frint64z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frinta_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frinti_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frintm_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frintn_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frintp_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frintx_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frintz_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"frsqrte_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"fsqrt_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"scvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"ucvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP},
+ {"smlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"smlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"smull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"umlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"umlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"umull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"sqdmull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"sqdmlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"sqdmlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong},
+ {"sdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement},
+ {"udot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement},
+ {"usdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement},
+ {"sudot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement},
+ {"fmlal2_asimdelem_lh"_h,
+ &Disassembler::DisassembleNEONFPMulByElementLong},
+ {"fmlal_asimdelem_lh"_h,
+ &Disassembler::DisassembleNEONFPMulByElementLong},
+ {"fmlsl2_asimdelem_lh"_h,
+ &Disassembler::DisassembleNEONFPMulByElementLong},
+ {"fmlsl_asimdelem_lh"_h,
+ &Disassembler::DisassembleNEONFPMulByElementLong},
+ {"fcmla_asimdelem_c_h"_h,
+ &Disassembler::DisassembleNEONComplexMulByElement},
+ {"fcmla_asimdelem_c_s"_h,
+ &Disassembler::DisassembleNEONComplexMulByElement},
+ {"fmla_asimdelem_rh_h"_h,
+ &Disassembler::DisassembleNEONHalfFPMulByElement},
+ {"fmls_asimdelem_rh_h"_h,
+ &Disassembler::DisassembleNEONHalfFPMulByElement},
+ {"fmulx_asimdelem_rh_h"_h,
+ &Disassembler::DisassembleNEONHalfFPMulByElement},
+ {"fmul_asimdelem_rh_h"_h,
+ &Disassembler::DisassembleNEONHalfFPMulByElement},
+ {"fmla_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement},
+ {"fmls_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement},
+ {"fmulx_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement},
+ {"fmul_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement},
+ {"mla_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"mls_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"mul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"saba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"sabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"shadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"shsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"smaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"smax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"sminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"smin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"srhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"uaba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"uabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"uhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"uhsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"umaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"umax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"uminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"umin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"urhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD},
+ {"and_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"bic_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"bif_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"bit_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"bsl_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"eor_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"orr_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"orn_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"pmul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical},
+ {"fmlal2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM},
+ {"fmlal_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM},
+ {"fmlsl2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM},
+ {"fmlsl_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM},
+ {"sri_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"srshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"srsra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"sshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"ssra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"urshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"ursra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"ushr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"usra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"scvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"ucvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"fcvtzs_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"fcvtzu_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm},
+ {"ushll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm},
+ {"sshll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm},
+ {"shrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"rshrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"sqshrn_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"sqrshrn_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"sqshrun_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"sqrshrun_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"uqshrn_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"uqrshrn_asimdshf_n"_h,
+ &Disassembler::DisassembleNEONShiftRightNarrowImm},
+ {"sqdmlal_asisdelem_l"_h,
+ &Disassembler::DisassembleNEONScalarSatMulLongIndex},
+ {"sqdmlsl_asisdelem_l"_h,
+ &Disassembler::DisassembleNEONScalarSatMulLongIndex},
+ {"sqdmull_asisdelem_l"_h,
+ &Disassembler::DisassembleNEONScalarSatMulLongIndex},
+ {"fmla_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmla_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmls_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmls_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmulx_asisdelem_rh_h"_h,
+ &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmulx_asisdelem_r_sd"_h,
+ &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmul_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fmul_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex},
+ {"fabd_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"facge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"facgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"fcmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"fcmge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"fcmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"fmulx_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"frecps_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"frsqrts_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same},
+ {"sqrdmlah_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same},
+ {"sqrdmlsh_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same},
+ {"cmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"cmge_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"cmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"cmhi_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"cmhs_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"cmtst_asisdsame_only"_h,
+ &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"add_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"sub_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD},
+ {"fmaxnmv_asimdall_only_h"_h,
+ &Disassembler::DisassembleNEONFP16AcrossLanes},
+ {"fmaxv_asimdall_only_h"_h,
+ &Disassembler::DisassembleNEONFP16AcrossLanes},
+ {"fminnmv_asimdall_only_h"_h,
+ &Disassembler::DisassembleNEONFP16AcrossLanes},
+ {"fminv_asimdall_only_h"_h,
+ &Disassembler::DisassembleNEONFP16AcrossLanes},
+ {"fmaxnmv_asimdall_only_sd"_h,
+ &Disassembler::DisassembleNEONFPAcrossLanes},
+ {"fminnmv_asimdall_only_sd"_h,
+ &Disassembler::DisassembleNEONFPAcrossLanes},
+ {"fmaxv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes},
+ {"fminv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes},
+ {"shl_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"sli_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"sri_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"srshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"srsra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"sshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"ssra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"urshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"ursra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"ushr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"usra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD},
+ {"sqrshrn_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"sqrshrun_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"sqshrn_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"sqshrun_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"uqrshrn_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"uqshrn_asisdshf_n"_h,
+ &Disassembler::DisassembleNEONScalarShiftRightNarrowImm},
+ {"cmeq_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"cmge_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"cmgt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"cmle_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"cmlt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"abs_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"neg_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD},
+ {"fcmeq_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcmge_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcmgt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcmle_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcmlt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtas_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtau_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtms_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtmu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtns_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtnu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtps_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtpu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtxn_asisdmisc_n"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtzs_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"fcvtzu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"frecpe_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"frecpx_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"frsqrte_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"scvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"ucvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc},
+ {"adclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
+ {"adclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
+ {"addhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"addhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"addp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"aesd_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB},
+ {"aese_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB},
+ {"aesimc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB},
+ {"aesmc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB},
+ {"bcax_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"bdep_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"bext_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"bgrp_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"bsl1n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"bsl2n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"bsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"cadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition},
+ {"cdot_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const},
+ {"cdot_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const},
+ {"cdot_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const},
+ {"cmla_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const},
+ {"cmla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const},
+ {"cmla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const},
+ {"eor3_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"eorbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"eortb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"ext_z_zi_con"_h, &Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm},
+ {"faddp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair},
+ {"fcvtlt_z_p_z_h2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnH},
+ {"fcvtlt_z_p_z_s2d"_h, &Disassembler::Disassemble_ZdD_PgM_ZnS},
+ {"fcvtnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD},
+ {"fcvtnt_z_p_z_s2h"_h, &Disassembler::Disassemble_ZdH_PgM_ZnS},
+ {"fcvtx_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD},
+ {"fcvtxnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD},
+ {"flogb_z_p_z"_h, &Disassembler::DisassembleSVEFlogb},
+ {"fmaxnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair},
+ {"fmaxp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair},
+ {"fminnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair},
+ {"fminp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair},
+ {"fmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH},
+ {"fmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"fmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH},
+ {"fmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"fmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH},
+ {"fmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"fmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH},
+ {"fmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"histcnt_z_p_zz"_h, &Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT},
+ {"histseg_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB},
+ {"ldnt1b_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1b_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1d_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1h_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1h_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sb_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1sb_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sh_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1sh_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sw_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1w_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1w_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm},
+ {"match_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT},
+ {"mla_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm},
+ {"mla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm},
+ {"mla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm},
+ {"mls_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm},
+ {"mls_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm},
+ {"mls_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm},
+ {"mul_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"mul_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm},
+ {"mul_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm},
+ {"mul_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm},
+ {"nbsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary},
+ {"nmatch_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT},
+ {"pmul_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB},
+ {"pmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"pmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"raddhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"raddhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"rax1_z_zz"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD},
+ {"rshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"rshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"rsubhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"rsubhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"saba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"sabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb},
+ {"saddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"saddlbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"saddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"saddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"saddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"sbclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
+ {"sbclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry},
+ {"shadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"shrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"shrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"shsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"shsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sli_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"sm4e_z_zz"_h, &Disassembler::Disassemble_ZdnS_ZdnS_ZmS},
+ {"sm4ekey_z_zz"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS},
+ {"smaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"smlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"smlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"smlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"smlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"smlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"smlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"smlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"smlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"smulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"smullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"smullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"smullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"smullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"smullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"splice_z_p_zz_con"_h, &Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T},
+ {"sqabs_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT},
+ {"sqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqcadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition},
+ {"sqdmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlalbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlslbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm},
+ {"sqdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"sqdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm},
+ {"sqdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm},
+ {"sqdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm},
+ {"sqdmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sqdmullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"sqdmullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"sqdmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"sqdmullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"sqdmullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"sqneg_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT},
+ {"sqrdcmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const},
+ {"sqrdcmlah_z_zzzi_h"_h,
+ &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const},
+ {"sqrdcmlah_z_zzzi_s"_h,
+ &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const},
+ {"sqrdmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"sqrdmlah_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm},
+ {"sqrdmlah_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm},
+ {"sqrdmlah_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm},
+ {"sqrdmlsh_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"sqrdmlsh_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm},
+ {"sqrdmlsh_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm},
+ {"sqrdmlsh_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm},
+ {"sqrdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"sqrdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm},
+ {"sqrdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm},
+ {"sqrdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm},
+ {"sqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqrshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqrshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated},
+ {"sqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqshlu_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated},
+ {"sqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"sqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"sqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"sqxtunb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"sqxtunt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"srhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"sri_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"srshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"srshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"srshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated},
+ {"srsra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"sshllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm},
+ {"sshllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm},
+ {"ssra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"ssublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"ssublbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"ssublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"ssubltb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"ssubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"ssubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"stnt1b_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm},
+ {"stnt1b_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm},
+ {"stnt1d_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm},
+ {"stnt1h_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm},
+ {"stnt1h_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm},
+ {"stnt1w_z_p_ar_d_64_unscaled"_h,
+ &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm},
+ {"stnt1w_z_p_ar_s_x32_unscaled"_h,
+ &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm},
+ {"subhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"subhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh},
+ {"suqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"tbl_z_zz_2"_h, &Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT},
+ {"tbx_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"uaba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"uabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb},
+ {"uaddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uaddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"uaddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"uaddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"uhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uhsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uhsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"umaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"umlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"umlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"umlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"umlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"umlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"umlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"umlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb},
+ {"umlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"umulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT},
+ {"umullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"umullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"umullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"umullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm},
+ {"umullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm},
+ {"uqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"uqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"uqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated},
+ {"uqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"uqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm},
+ {"uqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"uqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"uqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb},
+ {"urecpe_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS},
+ {"urhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"urshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"urshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"urshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated},
+ {"ursqrte_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS},
+ {"ursra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"ushllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm},
+ {"ushllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm},
+ {"usqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT},
+ {"usra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated},
+ {"usublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"usublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb},
+ {"usubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"usubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb},
+ {"whilege_p_p_rr"_h,
+ &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilegt_p_p_rr"_h,
+ &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilehi_p_p_rr"_h,
+ &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilehs_p_p_rr"_h,
+ &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilerw_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilewr_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit},
+ {"xar_z_zzi"_h, &Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const},
+ {"fmmla_z_zzz_s"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"fmmla_z_zzz_d"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT},
+ {"smmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB},
+ {"ummla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB},
+ {"usmmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB},
+ {"usdot_z_zzz_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB},
+ {"smmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B},
+ {"ummla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B},
+ {"usmmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B},
+ {"ld1row_z_p_bi_u32"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1row_z_p_br_contiguous"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1rod_z_p_bi_u64"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1rod_z_p_br_contiguous"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1rob_z_p_bi_u8"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1rob_z_p_br_contiguous"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1roh_z_p_bi_u16"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1roh_z_p_br_contiguous"_h,
+ &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"usdot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex},
+ {"sudot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex},
+ {"usdot_asimdsame2_d"_h, &Disassembler::VisitNEON3SameExtra},
+ };
+ return &form_to_visitor;
+} // NOLINT(readability/fn_size)
+
Disassembler::Disassembler() {
buffer_size_ = 256;
buffer_ = reinterpret_cast<char *>(malloc(buffer_size_));
@@ -42,7 +684,6 @@ Disassembler::Disassembler() {
code_address_offset_ = 0;
}
-
Disassembler::Disassembler(char *text_buffer, int buffer_size) {
buffer_size_ = buffer_size;
buffer_ = text_buffer;
@@ -51,61 +692,46 @@ Disassembler::Disassembler(char *text_buffer, int buffer_size) {
code_address_offset_ = 0;
}
-
Disassembler::~Disassembler() {
if (own_buffer_) {
free(buffer_);
}
}
-
char *Disassembler::GetOutput() { return buffer_; }
-
void Disassembler::VisitAddSubImmediate(const Instruction *instr) {
bool rd_is_zr = RdIsZROrSP(instr);
bool stack_op =
(rd_is_zr || RnIsZROrSP(instr)) && (instr->GetImmAddSub() == 0) ? true
: false;
- const char *mnemonic = "";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Rds, 'Rns, 'IAddSub";
const char *form_cmp = "'Rns, 'IAddSub";
const char *form_mov = "'Rds, 'Rns";
- switch (instr->Mask(AddSubImmediateMask)) {
- case ADD_w_imm:
- case ADD_x_imm: {
- mnemonic = "add";
+ switch (form_hash_) {
+ case "add_32_addsub_imm"_h:
+ case "add_64_addsub_imm"_h:
if (stack_op) {
mnemonic = "mov";
form = form_mov;
}
break;
- }
- case ADDS_w_imm:
- case ADDS_x_imm: {
- mnemonic = "adds";
+ case "adds_32s_addsub_imm"_h:
+ case "adds_64s_addsub_imm"_h:
if (rd_is_zr) {
mnemonic = "cmn";
form = form_cmp;
}
break;
- }
- case SUB_w_imm:
- case SUB_x_imm:
- mnemonic = "sub";
- break;
- case SUBS_w_imm:
- case SUBS_x_imm: {
- mnemonic = "subs";
+ case "subs_32s_addsub_imm"_h:
+ case "subs_64s_addsub_imm"_h:
if (rd_is_zr) {
mnemonic = "cmp";
form = form_cmp;
}
break;
- }
- default:
- VIXL_UNREACHABLE();
}
Format(instr, mnemonic, form);
}
@@ -114,37 +740,28 @@ void Disassembler::VisitAddSubImmediate(const Instruction *instr) {
void Disassembler::VisitAddSubShifted(const Instruction *instr) {
bool rd_is_zr = RdIsZROrSP(instr);
bool rn_is_zr = RnIsZROrSP(instr);
- const char *mnemonic = "";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Rd, 'Rn, 'Rm'NDP";
const char *form_cmp = "'Rn, 'Rm'NDP";
const char *form_neg = "'Rd, 'Rm'NDP";
- switch (instr->Mask(AddSubShiftedMask)) {
- case ADD_w_shift:
- case ADD_x_shift:
- mnemonic = "add";
- break;
- case ADDS_w_shift:
- case ADDS_x_shift: {
- mnemonic = "adds";
+ switch (form_hash_) {
+ case "adds_32_addsub_shift"_h:
+ case "adds_64_addsub_shift"_h:
if (rd_is_zr) {
mnemonic = "cmn";
form = form_cmp;
}
break;
- }
- case SUB_w_shift:
- case SUB_x_shift: {
- mnemonic = "sub";
+ case "sub_32_addsub_shift"_h:
+ case "sub_64_addsub_shift"_h:
if (rn_is_zr) {
mnemonic = "neg";
form = form_neg;
}
break;
- }
- case SUBS_w_shift:
- case SUBS_x_shift: {
- mnemonic = "subs";
+ case "subs_32_addsub_shift"_h:
+ case "subs_64_addsub_shift"_h:
if (rd_is_zr) {
mnemonic = "cmp";
form = form_cmp;
@@ -152,10 +769,6 @@ void Disassembler::VisitAddSubShifted(const Instruction *instr) {
mnemonic = "negs";
form = form_neg;
}
- break;
- }
- default:
- VIXL_UNREACHABLE();
}
Format(instr, mnemonic, form);
}
@@ -245,40 +858,12 @@ void Disassembler::VisitAddSubWithCarry(const Instruction *instr) {
void Disassembler::VisitRotateRightIntoFlags(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(RotateRightIntoFlags)";
-
- switch (instr->Mask(RotateRightIntoFlagsMask)) {
- case RMIF:
- mnemonic = "rmif";
- form = "'Xn, 'IRr, 'INzcv";
- break;
- default:
- VIXL_UNREACHABLE();
- }
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Xn, 'IRr, 'INzcv");
}
void Disassembler::VisitEvaluateIntoFlags(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(EvaluateIntoFlags)";
-
- switch (instr->Mask(EvaluateIntoFlagsMask)) {
- case SETF8:
- mnemonic = "setf8";
- form = "'Wn";
- break;
- case SETF16:
- mnemonic = "setf16";
- form = "'Wn";
- break;
- default:
- VIXL_UNREACHABLE();
- }
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Wn");
}
@@ -361,60 +946,32 @@ bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) {
void Disassembler::VisitLogicalShifted(const Instruction *instr) {
bool rd_is_zr = RdIsZROrSP(instr);
bool rn_is_zr = RnIsZROrSP(instr);
- const char *mnemonic = "";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Rd, 'Rn, 'Rm'NLo";
- switch (instr->Mask(LogicalShiftedMask)) {
- case AND_w:
- case AND_x:
- mnemonic = "and";
- break;
- case BIC_w:
- case BIC_x:
- mnemonic = "bic";
- break;
- case EOR_w:
- case EOR_x:
- mnemonic = "eor";
- break;
- case EON_w:
- case EON_x:
- mnemonic = "eon";
- break;
- case BICS_w:
- case BICS_x:
- mnemonic = "bics";
- break;
- case ANDS_w:
- case ANDS_x: {
- mnemonic = "ands";
+ switch (form_hash_) {
+ case "ands_32_log_shift"_h:
+ case "ands_64_log_shift"_h:
if (rd_is_zr) {
mnemonic = "tst";
form = "'Rn, 'Rm'NLo";
}
break;
- }
- case ORR_w:
- case ORR_x: {
- mnemonic = "orr";
+ case "orr_32_log_shift"_h:
+ case "orr_64_log_shift"_h:
if (rn_is_zr && (instr->GetImmDPShift() == 0) &&
(instr->GetShiftDP() == LSL)) {
mnemonic = "mov";
form = "'Rd, 'Rm";
}
break;
- }
- case ORN_w:
- case ORN_x: {
- mnemonic = "orn";
+ case "orn_32_log_shift"_h:
+ case "orn_64_log_shift"_h:
if (rn_is_zr) {
mnemonic = "mvn";
form = "'Rd, 'Rm'NLo";
}
break;
- }
- default:
- VIXL_UNREACHABLE();
}
Format(instr, mnemonic, form);
@@ -422,42 +979,12 @@ void Disassembler::VisitLogicalShifted(const Instruction *instr) {
void Disassembler::VisitConditionalCompareRegister(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Rn, 'Rm, 'INzcv, 'Cond";
-
- switch (instr->Mask(ConditionalCompareRegisterMask)) {
- case CCMN_w:
- case CCMN_x:
- mnemonic = "ccmn";
- break;
- case CCMP_w:
- case CCMP_x:
- mnemonic = "ccmp";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Rn, 'Rm, 'INzcv, 'Cond");
}
void Disassembler::VisitConditionalCompareImmediate(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Rn, 'IP, 'INzcv, 'Cond";
-
- switch (instr->Mask(ConditionalCompareImmediateMask)) {
- case CCMN_w_imm:
- case CCMN_x_imm:
- mnemonic = "ccmn";
- break;
- case CCMP_w_imm:
- case CCMP_x_imm:
- mnemonic = "ccmp";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Rn, 'IP, 'INzcv, 'Cond");
}
@@ -531,6 +1058,16 @@ void Disassembler::VisitBitfield(const Instruction *instr) {
const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1";
const char *form_lsl = "'Rd, 'Rn, 'IBZ-r";
+ if (instr->GetSixtyFourBits() != instr->GetBitN()) {
+ VisitUnallocated(instr);
+ return;
+ }
+
+ if ((instr->GetSixtyFourBits() == 0) && ((s > 31) || (r > 31))) {
+ VisitUnallocated(instr);
+ return;
+ }
+
switch (instr->Mask(BitfieldMask)) {
case SBFM_w:
case SBFM_x: {
@@ -638,217 +1175,102 @@ void Disassembler::VisitPCRelAddressing(const Instruction *instr) {
void Disassembler::VisitConditionalBranch(const Instruction *instr) {
- switch (instr->Mask(ConditionalBranchMask)) {
- case B_cond:
- Format(instr, "b.'CBrn", "'TImmCond");
- break;
- default:
- VIXL_UNREACHABLE();
- }
+ // We can't use the mnemonic directly here, as there's no space between it and
+ // the condition. Assert that we have the correct mnemonic, then use "b"
+ // explicitly for formatting the output.
+ VIXL_ASSERT(form_hash_ == "b_only_condbranch"_h);
+ Format(instr, "b.'CBrn", "'TImmCond");
}
void Disassembler::VisitUnconditionalBranchToRegister(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form;
+ const char *form = "'Xn";
- switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
- case BR:
- mnemonic = "br";
- form = "'Xn";
- break;
- case BLR:
- mnemonic = "blr";
- form = "'Xn";
- break;
- case RET: {
- mnemonic = "ret";
+ switch (form_hash_) {
+ case "ret_64r_branch_reg"_h:
if (instr->GetRn() == kLinkRegCode) {
- form = NULL;
- } else {
- form = "'Xn";
+ form = "";
}
break;
- }
- case BRAAZ:
- mnemonic = "braaz";
- form = "'Xn";
- break;
- case BRABZ:
- mnemonic = "brabz";
- form = "'Xn";
- break;
- case BLRAAZ:
- mnemonic = "blraaz";
- form = "'Xn";
- break;
- case BLRABZ:
- mnemonic = "blrabz";
- form = "'Xn";
- break;
- case RETAA:
- mnemonic = "retaa";
- form = NULL;
- break;
- case RETAB:
- mnemonic = "retab";
- form = NULL;
- break;
- case BRAA:
- mnemonic = "braa";
- form = "'Xn, 'Xds";
- break;
- case BRAB:
- mnemonic = "brab";
- form = "'Xn, 'Xds";
- break;
- case BLRAA:
- mnemonic = "blraa";
- form = "'Xn, 'Xds";
+ case "retaa_64e_branch_reg"_h:
+ case "retab_64e_branch_reg"_h:
+ form = "";
break;
- case BLRAB:
- mnemonic = "blrab";
+ case "braa_64p_branch_reg"_h:
+ case "brab_64p_branch_reg"_h:
+ case "blraa_64p_branch_reg"_h:
+ case "blrab_64p_branch_reg"_h:
form = "'Xn, 'Xds";
break;
- default:
- form = "(UnconditionalBranchToRegister)";
}
- Format(instr, mnemonic, form);
+
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitUnconditionalBranch(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'TImmUncn";
-
- switch (instr->Mask(UnconditionalBranchMask)) {
- case B:
- mnemonic = "b";
- break;
- case BL:
- mnemonic = "bl";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'TImmUncn");
}
void Disassembler::VisitDataProcessing1Source(const Instruction *instr) {
- const char *mnemonic = "";
const char *form = "'Rd, 'Rn";
- switch (instr->Mask(DataProcessing1SourceMask)) {
-#define FORMAT(A, B) \
- case A##_w: \
- case A##_x: \
- mnemonic = B; \
- break;
- FORMAT(RBIT, "rbit");
- FORMAT(REV16, "rev16");
- FORMAT(REV, "rev");
- FORMAT(CLZ, "clz");
- FORMAT(CLS, "cls");
-#undef FORMAT
-
-#define PAUTH_VARIATIONS(V) \
- V(PACI, "paci") \
- V(PACD, "pacd") \
- V(AUTI, "auti") \
- V(AUTD, "autd")
-#define PAUTH_CASE(NAME, MN) \
- case NAME##A: \
- mnemonic = MN "a"; \
- form = "'Xd, 'Xns"; \
- break; \
- case NAME##ZA: \
- mnemonic = MN "za"; \
- form = "'Xd"; \
- break; \
- case NAME##B: \
- mnemonic = MN "b"; \
- form = "'Xd, 'Xns"; \
- break; \
- case NAME##ZB: \
- mnemonic = MN "zb"; \
- form = "'Xd"; \
- break;
-
- PAUTH_VARIATIONS(PAUTH_CASE)
-#undef PAUTH_CASE
-
- case XPACI:
- mnemonic = "xpaci";
- form = "'Xd";
- break;
- case XPACD:
- mnemonic = "xpacd";
+ switch (form_hash_) {
+ case "pacia_64p_dp_1src"_h:
+ case "pacda_64p_dp_1src"_h:
+ case "autia_64p_dp_1src"_h:
+ case "autda_64p_dp_1src"_h:
+ case "pacib_64p_dp_1src"_h:
+ case "pacdb_64p_dp_1src"_h:
+ case "autib_64p_dp_1src"_h:
+ case "autdb_64p_dp_1src"_h:
+ form = "'Xd, 'Xns";
+ break;
+ case "paciza_64z_dp_1src"_h:
+ case "pacdza_64z_dp_1src"_h:
+ case "autiza_64z_dp_1src"_h:
+ case "autdza_64z_dp_1src"_h:
+ case "pacizb_64z_dp_1src"_h:
+ case "pacdzb_64z_dp_1src"_h:
+ case "autizb_64z_dp_1src"_h:
+ case "autdzb_64z_dp_1src"_h:
+ case "xpacd_64z_dp_1src"_h:
+ case "xpaci_64z_dp_1src"_h:
form = "'Xd";
break;
- case REV32_x:
- mnemonic = "rev32";
- break;
- default:
- VIXL_UNREACHABLE();
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitDataProcessing2Source(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ std::string mnemonic = mnemonic_;
const char *form = "'Rd, 'Rn, 'Rm";
- const char *form_wwx = "'Wd, 'Wn, 'Xm";
- switch (instr->Mask(DataProcessing2SourceMask)) {
-#define FORMAT(A, B) \
- case A##_w: \
- case A##_x: \
- mnemonic = B; \
- break;
- FORMAT(UDIV, "udiv");
- FORMAT(SDIV, "sdiv");
- FORMAT(LSLV, "lsl");
- FORMAT(LSRV, "lsr");
- FORMAT(ASRV, "asr");
- FORMAT(RORV, "ror");
-#undef FORMAT
- case PACGA:
- mnemonic = "pacga";
+ switch (form_hash_) {
+ case "asrv_32_dp_2src"_h:
+ case "asrv_64_dp_2src"_h:
+ case "lslv_32_dp_2src"_h:
+ case "lslv_64_dp_2src"_h:
+ case "lsrv_32_dp_2src"_h:
+ case "lsrv_64_dp_2src"_h:
+ case "rorv_32_dp_2src"_h:
+ case "rorv_64_dp_2src"_h:
+ // Drop the last 'v' character.
+ VIXL_ASSERT(mnemonic[3] == 'v');
+ mnemonic.pop_back();
+ break;
+ case "pacga_64p_dp_2src"_h:
form = "'Xd, 'Xn, 'Xms";
break;
- case CRC32B:
- mnemonic = "crc32b";
- break;
- case CRC32H:
- mnemonic = "crc32h";
- break;
- case CRC32W:
- mnemonic = "crc32w";
- break;
- case CRC32X:
- mnemonic = "crc32x";
- form = form_wwx;
- break;
- case CRC32CB:
- mnemonic = "crc32cb";
- break;
- case CRC32CH:
- mnemonic = "crc32ch";
+ case "crc32x_64c_dp_2src"_h:
+ case "crc32cx_64c_dp_2src"_h:
+ form = "'Wd, 'Wn, 'Xm";
break;
- case CRC32CW:
- mnemonic = "crc32cw";
- break;
- case CRC32CX:
- mnemonic = "crc32cx";
- form = form_wwx;
- break;
- default:
- form = "(DataProcessing2Source)";
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic.c_str(), form);
}
@@ -932,44 +1354,16 @@ void Disassembler::VisitDataProcessing3Source(const Instruction *instr) {
void Disassembler::VisitCompareBranch(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Rt, 'TImmCmpa";
-
- switch (instr->Mask(CompareBranchMask)) {
- case CBZ_w:
- case CBZ_x:
- mnemonic = "cbz";
- break;
- case CBNZ_w:
- case CBNZ_x:
- mnemonic = "cbnz";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Rt, 'TImmCmpa");
}
void Disassembler::VisitTestBranch(const Instruction *instr) {
- const char *mnemonic = "";
// If the top bit of the immediate is clear, the tested register is
// disassembled as Wt, otherwise Xt. As the top bit of the immediate is
// encoded in bit 31 of the instruction, we can reuse the Rt form, which
// uses bit 31 (normally "sf") to choose the register size.
- const char *form = "'Rt, 'It, 'TImmTest";
-
- switch (instr->Mask(TestBranchMask)) {
- case TBZ:
- mnemonic = "tbz";
- break;
- case TBNZ:
- mnemonic = "tbnz";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Rt, 'It, 'TImmTest");
}
@@ -1014,138 +1408,94 @@ void Disassembler::VisitMoveWideImmediate(const Instruction *instr) {
}
-#define LOAD_STORE_LIST(V) \
- V(STRB_w, "strb", "'Wt") \
- V(STRH_w, "strh", "'Wt") \
- V(STR_w, "str", "'Wt") \
- V(STR_x, "str", "'Xt") \
- V(LDRB_w, "ldrb", "'Wt") \
- V(LDRH_w, "ldrh", "'Wt") \
- V(LDR_w, "ldr", "'Wt") \
- V(LDR_x, "ldr", "'Xt") \
- V(LDRSB_x, "ldrsb", "'Xt") \
- V(LDRSH_x, "ldrsh", "'Xt") \
- V(LDRSW_x, "ldrsw", "'Xt") \
- V(LDRSB_w, "ldrsb", "'Wt") \
- V(LDRSH_w, "ldrsh", "'Wt") \
- V(STR_b, "str", "'Bt") \
- V(STR_h, "str", "'Ht") \
- V(STR_s, "str", "'St") \
- V(STR_d, "str", "'Dt") \
- V(LDR_b, "ldr", "'Bt") \
- V(LDR_h, "ldr", "'Ht") \
- V(LDR_s, "ldr", "'St") \
- V(LDR_d, "ldr", "'Dt") \
- V(STR_q, "str", "'Qt") \
- V(LDR_q, "ldr", "'Qt")
+#define LOAD_STORE_LIST(V) \
+ V(STRB_w, "'Wt") \
+ V(STRH_w, "'Wt") \
+ V(STR_w, "'Wt") \
+ V(STR_x, "'Xt") \
+ V(LDRB_w, "'Wt") \
+ V(LDRH_w, "'Wt") \
+ V(LDR_w, "'Wt") \
+ V(LDR_x, "'Xt") \
+ V(LDRSB_x, "'Xt") \
+ V(LDRSH_x, "'Xt") \
+ V(LDRSW_x, "'Xt") \
+ V(LDRSB_w, "'Wt") \
+ V(LDRSH_w, "'Wt") \
+ V(STR_b, "'Bt") \
+ V(STR_h, "'Ht") \
+ V(STR_s, "'St") \
+ V(STR_d, "'Dt") \
+ V(LDR_b, "'Bt") \
+ V(LDR_h, "'Ht") \
+ V(LDR_s, "'St") \
+ V(LDR_d, "'Dt") \
+ V(STR_q, "'Qt") \
+ V(LDR_q, "'Qt")
void Disassembler::VisitLoadStorePreIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStorePreIndex)";
+ const char *suffix = ", ['Xns'ILSi]!";
switch (instr->Mask(LoadStorePreIndexMask)) {
-#define LS_PREINDEX(A, B, C) \
- case A##_pre: \
- mnemonic = B; \
- form = C ", ['Xns'ILSi]!"; \
+#define LS_PREINDEX(A, B) \
+ case A##_pre: \
+ form = B; \
break;
LOAD_STORE_LIST(LS_PREINDEX)
#undef LS_PREINDEX
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadStorePostIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStorePostIndex)";
+ const char *suffix = ", ['Xns]'ILSi";
switch (instr->Mask(LoadStorePostIndexMask)) {
-#define LS_POSTINDEX(A, B, C) \
- case A##_post: \
- mnemonic = B; \
- form = C ", ['Xns]'ILSi"; \
+#define LS_POSTINDEX(A, B) \
+ case A##_post: \
+ form = B; \
break;
LOAD_STORE_LIST(LS_POSTINDEX)
#undef LS_POSTINDEX
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStoreUnsignedOffset)";
+ const char *suffix = ", ['Xns'ILU]";
switch (instr->Mask(LoadStoreUnsignedOffsetMask)) {
-#define LS_UNSIGNEDOFFSET(A, B, C) \
- case A##_unsigned: \
- mnemonic = B; \
- form = C ", ['Xns'ILU]"; \
+#define LS_UNSIGNEDOFFSET(A, B) \
+ case A##_unsigned: \
+ form = B; \
break;
LOAD_STORE_LIST(LS_UNSIGNEDOFFSET)
#undef LS_UNSIGNEDOFFSET
case PRFM_unsigned:
- mnemonic = "prfm";
- form = "'prefOp, ['Xns'ILU]";
+ form = "'prefOp";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) {
- const char *mnemonic;
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Wt, ['Xns'ILS]";
const char *form_x = "'Xt, ['Xns'ILS]";
- switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
- case STLURB:
- mnemonic = "stlurb";
- break;
- case LDAPURB:
- mnemonic = "ldapurb";
- break;
- case LDAPURSB_w:
- mnemonic = "ldapursb";
- break;
- case LDAPURSB_x:
- mnemonic = "ldapursb";
- form = form_x;
- break;
- case STLURH:
- mnemonic = "stlurh";
- break;
- case LDAPURH:
- mnemonic = "ldapurh";
- break;
- case LDAPURSH_w:
- mnemonic = "ldapursh";
- break;
- case LDAPURSH_x:
- mnemonic = "ldapursh";
- form = form_x;
- break;
- case STLUR_w:
- mnemonic = "stlur";
- break;
- case LDAPUR_w:
- mnemonic = "ldapur";
- break;
- case LDAPURSW:
- mnemonic = "ldapursw";
+ switch (form_hash_) {
+ case "ldapursb_64_ldapstl_unscaled"_h:
+ case "ldapursh_64_ldapstl_unscaled"_h:
+ case "ldapursw_64_ldapstl_unscaled"_h:
+ case "ldapur_64_ldapstl_unscaled"_h:
+ case "stlur_64_ldapstl_unscaled"_h:
form = form_x;
break;
- case STLUR_x:
- mnemonic = "stlur";
- form = form_x;
- break;
- case LDAPUR_x:
- mnemonic = "ldapur";
- form = form_x;
- break;
- default:
- mnemonic = "unimplemented";
- form = "(LoadStoreRCpcUnscaledOffset)";
}
Format(instr, mnemonic, form);
@@ -1153,365 +1503,233 @@ void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) {
void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStoreRegisterOffset)";
+ const char *suffix = ", ['Xns, 'Offsetreg]";
switch (instr->Mask(LoadStoreRegisterOffsetMask)) {
-#define LS_REGISTEROFFSET(A, B, C) \
- case A##_reg: \
- mnemonic = B; \
- form = C ", ['Xns, 'Offsetreg]"; \
+#define LS_REGISTEROFFSET(A, B) \
+ case A##_reg: \
+ form = B; \
break;
LOAD_STORE_LIST(LS_REGISTEROFFSET)
#undef LS_REGISTEROFFSET
case PRFM_reg:
- mnemonic = "prfm";
- form = "'prefOp, ['Xns, 'Offsetreg]";
+ form = "'prefOp";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Wt, ['Xns'ILS]";
- const char *form_x = "'Xt, ['Xns'ILS]";
- const char *form_b = "'Bt, ['Xns'ILS]";
- const char *form_h = "'Ht, ['Xns'ILS]";
- const char *form_s = "'St, ['Xns'ILS]";
- const char *form_d = "'Dt, ['Xns'ILS]";
- const char *form_q = "'Qt, ['Xns'ILS]";
- const char *form_prefetch = "'prefOp, ['Xns'ILS]";
+ const char *form = "'Wt";
+ const char *suffix = ", ['Xns'ILS]";
- switch (instr->Mask(LoadStoreUnscaledOffsetMask)) {
- case STURB_w:
- mnemonic = "sturb";
- break;
- case STURH_w:
- mnemonic = "sturh";
- break;
- case STUR_w:
- mnemonic = "stur";
- break;
- case STUR_x:
- mnemonic = "stur";
- form = form_x;
- break;
- case STUR_b:
- mnemonic = "stur";
- form = form_b;
- break;
- case STUR_h:
- mnemonic = "stur";
- form = form_h;
- break;
- case STUR_s:
- mnemonic = "stur";
- form = form_s;
- break;
- case STUR_d:
- mnemonic = "stur";
- form = form_d;
- break;
- case STUR_q:
- mnemonic = "stur";
- form = form_q;
- break;
- case LDURB_w:
- mnemonic = "ldurb";
- break;
- case LDURH_w:
- mnemonic = "ldurh";
- break;
- case LDUR_w:
- mnemonic = "ldur";
- break;
- case LDUR_x:
- mnemonic = "ldur";
- form = form_x;
- break;
- case LDUR_b:
- mnemonic = "ldur";
- form = form_b;
- break;
- case LDUR_h:
- mnemonic = "ldur";
- form = form_h;
- break;
- case LDUR_s:
- mnemonic = "ldur";
- form = form_s;
+ switch (form_hash_) {
+ case "ldur_64_ldst_unscaled"_h:
+ case "ldursb_64_ldst_unscaled"_h:
+ case "ldursh_64_ldst_unscaled"_h:
+ case "ldursw_64_ldst_unscaled"_h:
+ case "stur_64_ldst_unscaled"_h:
+ form = "'Xt";
break;
- case LDUR_d:
- mnemonic = "ldur";
- form = form_d;
+ case "ldur_b_ldst_unscaled"_h:
+ case "stur_b_ldst_unscaled"_h:
+ form = "'Bt";
break;
- case LDUR_q:
- mnemonic = "ldur";
- form = form_q;
+ case "ldur_h_ldst_unscaled"_h:
+ case "stur_h_ldst_unscaled"_h:
+ form = "'Ht";
break;
- case LDURSB_x:
- form = form_x;
- VIXL_FALLTHROUGH();
- case LDURSB_w:
- mnemonic = "ldursb";
+ case "ldur_s_ldst_unscaled"_h:
+ case "stur_s_ldst_unscaled"_h:
+ form = "'St";
break;
- case LDURSH_x:
- form = form_x;
- VIXL_FALLTHROUGH();
- case LDURSH_w:
- mnemonic = "ldursh";
+ case "ldur_d_ldst_unscaled"_h:
+ case "stur_d_ldst_unscaled"_h:
+ form = "'Dt";
break;
- case LDURSW_x:
- mnemonic = "ldursw";
- form = form_x;
+ case "ldur_q_ldst_unscaled"_h:
+ case "stur_q_ldst_unscaled"_h:
+ form = "'Qt";
break;
- case PRFUM:
- mnemonic = "prfum";
- form = form_prefetch;
+ case "prfum_p_ldst_unscaled"_h:
+ form = "'prefOp";
break;
- default:
- form = "(LoadStoreUnscaledOffset)";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadLiteral(const Instruction *instr) {
- const char *mnemonic = "ldr";
- const char *form = "(LoadLiteral)";
+ const char *form = "'Wt";
+ const char *suffix = ", 'ILLiteral 'LValue";
- switch (instr->Mask(LoadLiteralMask)) {
- case LDR_w_lit:
- form = "'Wt, 'ILLiteral 'LValue";
- break;
- case LDR_x_lit:
- form = "'Xt, 'ILLiteral 'LValue";
+ switch (form_hash_) {
+ case "ldr_64_loadlit"_h:
+ case "ldrsw_64_loadlit"_h:
+ form = "'Xt";
break;
- case LDR_s_lit:
- form = "'St, 'ILLiteral 'LValue";
- break;
- case LDR_d_lit:
- form = "'Dt, 'ILLiteral 'LValue";
+ case "ldr_s_loadlit"_h:
+ form = "'St";
break;
- case LDR_q_lit:
- form = "'Qt, 'ILLiteral 'LValue";
+ case "ldr_d_loadlit"_h:
+ form = "'Dt";
break;
- case LDRSW_x_lit: {
- mnemonic = "ldrsw";
- form = "'Xt, 'ILLiteral 'LValue";
+ case "ldr_q_loadlit"_h:
+ form = "'Qt";
break;
- }
- case PRFM_lit: {
- mnemonic = "prfm";
- form = "'prefOp, 'ILLiteral 'LValue";
+ case "prfm_p_loadlit"_h:
+ form = "'prefOp";
break;
- }
- default:
- mnemonic = "unimplemented";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
-#define LOAD_STORE_PAIR_LIST(V) \
- V(STP_w, "stp", "'Wt, 'Wt2", "2") \
- V(LDP_w, "ldp", "'Wt, 'Wt2", "2") \
- V(LDPSW_x, "ldpsw", "'Xt, 'Xt2", "2") \
- V(STP_x, "stp", "'Xt, 'Xt2", "3") \
- V(LDP_x, "ldp", "'Xt, 'Xt2", "3") \
- V(STP_s, "stp", "'St, 'St2", "2") \
- V(LDP_s, "ldp", "'St, 'St2", "2") \
- V(STP_d, "stp", "'Dt, 'Dt2", "3") \
- V(LDP_d, "ldp", "'Dt, 'Dt2", "3") \
- V(LDP_q, "ldp", "'Qt, 'Qt2", "4") \
- V(STP_q, "stp", "'Qt, 'Qt2", "4")
+#define LOAD_STORE_PAIR_LIST(V) \
+ V(STP_w, "'Wt, 'Wt2", "2") \
+ V(LDP_w, "'Wt, 'Wt2", "2") \
+ V(LDPSW_x, "'Xt, 'Xt2", "2") \
+ V(STP_x, "'Xt, 'Xt2", "3") \
+ V(LDP_x, "'Xt, 'Xt2", "3") \
+ V(STP_s, "'St, 'St2", "2") \
+ V(LDP_s, "'St, 'St2", "2") \
+ V(STP_d, "'Dt, 'Dt2", "3") \
+ V(LDP_d, "'Dt, 'Dt2", "3") \
+ V(LDP_q, "'Qt, 'Qt2", "4") \
+ V(STP_q, "'Qt, 'Qt2", "4")
void Disassembler::VisitLoadStorePairPostIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStorePairPostIndex)";
switch (instr->Mask(LoadStorePairPostIndexMask)) {
-#define LSP_POSTINDEX(A, B, C, D) \
+#define LSP_POSTINDEX(A, B, C) \
case A##_post: \
- mnemonic = B; \
- form = C ", ['Xns]'ILP" D "i"; \
+ form = B ", ['Xns]'ILP" C "i"; \
break;
LOAD_STORE_PAIR_LIST(LSP_POSTINDEX)
#undef LSP_POSTINDEX
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitLoadStorePairPreIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStorePairPreIndex)";
switch (instr->Mask(LoadStorePairPreIndexMask)) {
-#define LSP_PREINDEX(A, B, C, D) \
+#define LSP_PREINDEX(A, B, C) \
case A##_pre: \
- mnemonic = B; \
- form = C ", ['Xns'ILP" D "i]!"; \
+ form = B ", ['Xns'ILP" C "i]!"; \
break;
LOAD_STORE_PAIR_LIST(LSP_PREINDEX)
#undef LSP_PREINDEX
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitLoadStorePairOffset(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(LoadStorePairOffset)";
switch (instr->Mask(LoadStorePairOffsetMask)) {
-#define LSP_OFFSET(A, B, C, D) \
+#define LSP_OFFSET(A, B, C) \
case A##_off: \
- mnemonic = B; \
- form = C ", ['Xns'ILP" D "]"; \
+ form = B ", ['Xns'ILP" C "]"; \
break;
LOAD_STORE_PAIR_LIST(LSP_OFFSET)
#undef LSP_OFFSET
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form;
+ const char *form = "'Wt, 'Wt2, ['Xns'ILP2]";
- switch (instr->Mask(LoadStorePairNonTemporalMask)) {
- case STNP_w:
- mnemonic = "stnp";
- form = "'Wt, 'Wt2, ['Xns'ILP2]";
- break;
- case LDNP_w:
- mnemonic = "ldnp";
- form = "'Wt, 'Wt2, ['Xns'ILP2]";
- break;
- case STNP_x:
- mnemonic = "stnp";
+ switch (form_hash_) {
+ case "ldnp_64_ldstnapair_offs"_h:
+ case "stnp_64_ldstnapair_offs"_h:
form = "'Xt, 'Xt2, ['Xns'ILP3]";
break;
- case LDNP_x:
- mnemonic = "ldnp";
- form = "'Xt, 'Xt2, ['Xns'ILP3]";
- break;
- case STNP_s:
- mnemonic = "stnp";
- form = "'St, 'St2, ['Xns'ILP2]";
- break;
- case LDNP_s:
- mnemonic = "ldnp";
+ case "ldnp_s_ldstnapair_offs"_h:
+ case "stnp_s_ldstnapair_offs"_h:
form = "'St, 'St2, ['Xns'ILP2]";
break;
- case STNP_d:
- mnemonic = "stnp";
- form = "'Dt, 'Dt2, ['Xns'ILP3]";
- break;
- case LDNP_d:
- mnemonic = "ldnp";
+ case "ldnp_d_ldstnapair_offs"_h:
+ case "stnp_d_ldstnapair_offs"_h:
form = "'Dt, 'Dt2, ['Xns'ILP3]";
break;
- case STNP_q:
- mnemonic = "stnp";
+ case "ldnp_q_ldstnapair_offs"_h:
+ case "stnp_q_ldstnapair_offs"_h:
form = "'Qt, 'Qt2, ['Xns'ILP4]";
break;
- case LDNP_q:
- mnemonic = "ldnp";
- form = "'Qt, 'Qt2, ['Xns'ILP4]";
- break;
- default:
- form = "(LoadStorePairNonTemporal)";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
// clang-format off
-#define LOAD_STORE_EXCLUSIVE_LIST(V) \
- V(STXRB_w, "stxrb", "'Ws, 'Wt") \
- V(STXRH_w, "stxrh", "'Ws, 'Wt") \
- V(STXR_w, "stxr", "'Ws, 'Wt") \
- V(STXR_x, "stxr", "'Ws, 'Xt") \
- V(LDXRB_w, "ldxrb", "'Wt") \
- V(LDXRH_w, "ldxrh", "'Wt") \
- V(LDXR_w, "ldxr", "'Wt") \
- V(LDXR_x, "ldxr", "'Xt") \
- V(STXP_w, "stxp", "'Ws, 'Wt, 'Wt2") \
- V(STXP_x, "stxp", "'Ws, 'Xt, 'Xt2") \
- V(LDXP_w, "ldxp", "'Wt, 'Wt2") \
- V(LDXP_x, "ldxp", "'Xt, 'Xt2") \
- V(STLXRB_w, "stlxrb", "'Ws, 'Wt") \
- V(STLXRH_w, "stlxrh", "'Ws, 'Wt") \
- V(STLXR_w, "stlxr", "'Ws, 'Wt") \
- V(STLXR_x, "stlxr", "'Ws, 'Xt") \
- V(LDAXRB_w, "ldaxrb", "'Wt") \
- V(LDAXRH_w, "ldaxrh", "'Wt") \
- V(LDAXR_w, "ldaxr", "'Wt") \
- V(LDAXR_x, "ldaxr", "'Xt") \
- V(STLXP_w, "stlxp", "'Ws, 'Wt, 'Wt2") \
- V(STLXP_x, "stlxp", "'Ws, 'Xt, 'Xt2") \
- V(LDAXP_w, "ldaxp", "'Wt, 'Wt2") \
- V(LDAXP_x, "ldaxp", "'Xt, 'Xt2") \
- V(STLRB_w, "stlrb", "'Wt") \
- V(STLRH_w, "stlrh", "'Wt") \
- V(STLR_w, "stlr", "'Wt") \
- V(STLR_x, "stlr", "'Xt") \
- V(LDARB_w, "ldarb", "'Wt") \
- V(LDARH_w, "ldarh", "'Wt") \
- V(LDAR_w, "ldar", "'Wt") \
- V(LDAR_x, "ldar", "'Xt") \
- V(STLLRB, "stllrb", "'Wt") \
- V(STLLRH, "stllrh", "'Wt") \
- V(STLLR_w, "stllr", "'Wt") \
- V(STLLR_x, "stllr", "'Xt") \
- V(LDLARB, "ldlarb", "'Wt") \
- V(LDLARH, "ldlarh", "'Wt") \
- V(LDLAR_w, "ldlar", "'Wt") \
- V(LDLAR_x, "ldlar", "'Xt") \
- V(CAS_w, "cas", "'Ws, 'Wt") \
- V(CAS_x, "cas", "'Xs, 'Xt") \
- V(CASA_w, "casa", "'Ws, 'Wt") \
- V(CASA_x, "casa", "'Xs, 'Xt") \
- V(CASL_w, "casl", "'Ws, 'Wt") \
- V(CASL_x, "casl", "'Xs, 'Xt") \
- V(CASAL_w, "casal", "'Ws, 'Wt") \
- V(CASAL_x, "casal", "'Xs, 'Xt") \
- V(CASB, "casb", "'Ws, 'Wt") \
- V(CASAB, "casab", "'Ws, 'Wt") \
- V(CASLB, "caslb", "'Ws, 'Wt") \
- V(CASALB, "casalb", "'Ws, 'Wt") \
- V(CASH, "cash", "'Ws, 'Wt") \
- V(CASAH, "casah", "'Ws, 'Wt") \
- V(CASLH, "caslh", "'Ws, 'Wt") \
- V(CASALH, "casalh", "'Ws, 'Wt") \
- V(CASP_w, "casp", "'Ws, 'Ws+, 'Wt, 'Wt+") \
- V(CASP_x, "casp", "'Xs, 'Xs+, 'Xt, 'Xt+") \
- V(CASPA_w, "caspa", "'Ws, 'Ws+, 'Wt, 'Wt+") \
- V(CASPA_x, "caspa", "'Xs, 'Xs+, 'Xt, 'Xt+") \
- V(CASPL_w, "caspl", "'Ws, 'Ws+, 'Wt, 'Wt+") \
- V(CASPL_x, "caspl", "'Xs, 'Xs+, 'Xt, 'Xt+") \
- V(CASPAL_w, "caspal", "'Ws, 'Ws+, 'Wt, 'Wt+") \
- V(CASPAL_x, "caspal", "'Xs, 'Xs+, 'Xt, 'Xt+")
+#define LOAD_STORE_EXCLUSIVE_LIST(V) \
+ V(STXRB_w, "'Ws, 'Wt") \
+ V(STXRH_w, "'Ws, 'Wt") \
+ V(STXR_w, "'Ws, 'Wt") \
+ V(STXR_x, "'Ws, 'Xt") \
+ V(LDXR_x, "'Xt") \
+ V(STXP_w, "'Ws, 'Wt, 'Wt2") \
+ V(STXP_x, "'Ws, 'Xt, 'Xt2") \
+ V(LDXP_w, "'Wt, 'Wt2") \
+ V(LDXP_x, "'Xt, 'Xt2") \
+ V(STLXRB_w, "'Ws, 'Wt") \
+ V(STLXRH_w, "'Ws, 'Wt") \
+ V(STLXR_w, "'Ws, 'Wt") \
+ V(STLXR_x, "'Ws, 'Xt") \
+ V(LDAXR_x, "'Xt") \
+ V(STLXP_w, "'Ws, 'Wt, 'Wt2") \
+ V(STLXP_x, "'Ws, 'Xt, 'Xt2") \
+ V(LDAXP_w, "'Wt, 'Wt2") \
+ V(LDAXP_x, "'Xt, 'Xt2") \
+ V(STLR_x, "'Xt") \
+ V(LDAR_x, "'Xt") \
+ V(STLLR_x, "'Xt") \
+ V(LDLAR_x, "'Xt") \
+ V(CAS_w, "'Ws, 'Wt") \
+ V(CAS_x, "'Xs, 'Xt") \
+ V(CASA_w, "'Ws, 'Wt") \
+ V(CASA_x, "'Xs, 'Xt") \
+ V(CASL_w, "'Ws, 'Wt") \
+ V(CASL_x, "'Xs, 'Xt") \
+ V(CASAL_w, "'Ws, 'Wt") \
+ V(CASAL_x, "'Xs, 'Xt") \
+ V(CASB, "'Ws, 'Wt") \
+ V(CASAB, "'Ws, 'Wt") \
+ V(CASLB, "'Ws, 'Wt") \
+ V(CASALB, "'Ws, 'Wt") \
+ V(CASH, "'Ws, 'Wt") \
+ V(CASAH, "'Ws, 'Wt") \
+ V(CASLH, "'Ws, 'Wt") \
+ V(CASALH, "'Ws, 'Wt") \
+ V(CASP_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASP_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPA_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPA_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPL_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \
+ V(CASPAL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \
+ V(CASPAL_x, "'Xs, 'Xs+, 'Xt, 'Xt+")
// clang-format on
void Disassembler::VisitLoadStoreExclusive(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form;
+ const char *form = "'Wt";
+ const char *suffix = ", ['Xns]";
switch (instr->Mask(LoadStoreExclusiveMask)) {
-#define LSX(A, B, C) \
- case A: \
- mnemonic = B; \
- form = C ", ['Xns]"; \
+#define LSX(A, B) \
+ case A: \
+ form = B; \
break;
LOAD_STORE_EXCLUSIVE_LIST(LSX)
#undef LSX
- default:
- form = "(LoadStoreExclusive)";
}
switch (instr->Mask(LoadStoreExclusiveMask)) {
@@ -1524,767 +1742,264 @@ void Disassembler::VisitLoadStoreExclusive(const Instruction *instr) {
case CASPAL_w:
case CASPAL_x:
if ((instr->GetRs() % 2 == 1) || (instr->GetRt() % 2 == 1)) {
- mnemonic = "unallocated";
- form = "(LoadStoreExclusive)";
+ VisitUnallocated(instr);
+ return;
}
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitLoadStorePAC(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(LoadStorePAC)";
-
- switch (instr->Mask(LoadStorePACMask)) {
- case LDRAA:
- mnemonic = "ldraa";
- form = "'Xt, ['Xns'ILA]";
- break;
- case LDRAB:
- mnemonic = "ldrab";
- form = "'Xt, ['Xns'ILA]";
- break;
- case LDRAA_pre:
- mnemonic = "ldraa";
- form = "'Xt, ['Xns'ILA]!";
- break;
- case LDRAB_pre:
- mnemonic = "ldrab";
- form = "'Xt, ['Xns'ILA]!";
+ const char *form = "'Xt, ['Xns'ILA]";
+ const char *suffix = "";
+ switch (form_hash_) {
+ case "ldraa_64w_ldst_pac"_h:
+ case "ldrab_64w_ldst_pac"_h:
+ suffix = "!";
break;
}
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
-#define ATOMIC_MEMORY_SIMPLE_LIST(V) \
- V(LDADD, "add") \
- V(LDCLR, "clr") \
- V(LDEOR, "eor") \
- V(LDSET, "set") \
- V(LDSMAX, "smax") \
- V(LDSMIN, "smin") \
- V(LDUMAX, "umax") \
- V(LDUMIN, "umin")
-
void Disassembler::VisitAtomicMemory(const Instruction *instr) {
- const int kMaxAtomicOpMnemonicLength = 16;
- const char *mnemonic;
- const char *form = "'Ws, 'Wt, ['Xns]";
-
- switch (instr->Mask(AtomicMemoryMask)) {
-#define AMS(A, MN) \
- case A##B: \
- mnemonic = MN "b"; \
- break; \
- case A##AB: \
- mnemonic = MN "ab"; \
- break; \
- case A##LB: \
- mnemonic = MN "lb"; \
- break; \
- case A##ALB: \
- mnemonic = MN "alb"; \
- break; \
- case A##H: \
- mnemonic = MN "h"; \
- break; \
- case A##AH: \
- mnemonic = MN "ah"; \
- break; \
- case A##LH: \
- mnemonic = MN "lh"; \
- break; \
- case A##ALH: \
- mnemonic = MN "alh"; \
- break; \
- case A##_w: \
- mnemonic = MN; \
- break; \
- case A##A_w: \
- mnemonic = MN "a"; \
- break; \
- case A##L_w: \
- mnemonic = MN "l"; \
- break; \
- case A##AL_w: \
- mnemonic = MN "al"; \
- break; \
- case A##_x: \
- mnemonic = MN; \
- form = "'Xs, 'Xt, ['Xns]"; \
- break; \
- case A##A_x: \
- mnemonic = MN "a"; \
- form = "'Xs, 'Xt, ['Xns]"; \
- break; \
- case A##L_x: \
- mnemonic = MN "l"; \
- form = "'Xs, 'Xt, ['Xns]"; \
- break; \
- case A##AL_x: \
- mnemonic = MN "al"; \
- form = "'Xs, 'Xt, ['Xns]"; \
- break;
- ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+ bool is_x = (instr->ExtractBits(31, 30) == 3);
+ const char *form = is_x ? "'Xs, 'Xt" : "'Ws, 'Wt";
+ const char *suffix = ", ['Xns]";
- // SWP has the same semantics as ldadd etc but without the store aliases.
- AMS(SWP, "swp")
-#undef AMS
+ std::string mnemonic = mnemonic_;
- case LDAPRB:
- mnemonic = "ldaprb";
- form = "'Wt, ['Xns]";
- break;
- case LDAPRH:
- mnemonic = "ldaprh";
- form = "'Wt, ['Xns]";
+ switch (form_hash_) {
+ case "ldaprb_32l_memop"_h:
+ case "ldaprh_32l_memop"_h:
+ case "ldapr_32l_memop"_h:
+ form = "'Wt";
break;
- case LDAPR_w:
- mnemonic = "ldapr";
- form = "'Wt, ['Xns]";
- break;
- case LDAPR_x:
- mnemonic = "ldapr";
- form = "'Xt, ['Xns]";
+ case "ldapr_64l_memop"_h:
+ form = "'Xt";
break;
default:
- mnemonic = "unimplemented";
- form = "(AtomicMemory)";
- }
-
- const char *prefix = "";
- switch (instr->Mask(AtomicMemoryMask)) {
-#define AMS(A, MN) \
- case A##AB: \
- case A##ALB: \
- case A##AH: \
- case A##ALH: \
- case A##A_w: \
- case A##AL_w: \
- case A##A_x: \
- case A##AL_x: \
- prefix = "ld"; \
- break; \
- case A##B: \
- case A##LB: \
- case A##H: \
- case A##LH: \
- case A##_w: \
- case A##L_w: { \
- prefix = "ld"; \
- unsigned rt = instr->GetRt(); \
- if (Register(rt, 32).IsZero()) { \
- prefix = "st"; \
- form = "'Ws, ['Xns]"; \
- } \
- break; \
- } \
- case A##_x: \
- case A##L_x: { \
- prefix = "ld"; \
- unsigned rt = instr->GetRt(); \
- if (Register(rt, 64).IsZero()) { \
- prefix = "st"; \
- form = "'Xs, ['Xns]"; \
- } \
- break; \
- }
- ATOMIC_MEMORY_SIMPLE_LIST(AMS)
-#undef AMS
- }
-
- char buffer[kMaxAtomicOpMnemonicLength];
- if (strlen(prefix) > 0) {
- snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic);
- mnemonic = buffer;
+ // Zero register implies a store instruction.
+ if (instr->GetRt() == kZeroRegCode) {
+ mnemonic.replace(0, 2, "st");
+ form = is_x ? "'Xs" : "'Ws";
+ }
}
-
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic.c_str(), form, suffix);
}
void Disassembler::VisitFPCompare(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "'Fn, 'Fm";
- const char *form_zero = "'Fn, #0.0";
-
- switch (instr->Mask(FPCompareMask)) {
- case FCMP_h_zero:
- case FCMP_s_zero:
- case FCMP_d_zero:
- form = form_zero;
- VIXL_FALLTHROUGH();
- case FCMP_h:
- case FCMP_s:
- case FCMP_d:
- mnemonic = "fcmp";
- break;
- case FCMPE_h_zero:
- case FCMPE_s_zero:
- case FCMPE_d_zero:
- form = form_zero;
- VIXL_FALLTHROUGH();
- case FCMPE_h:
- case FCMPE_s:
- case FCMPE_d:
- mnemonic = "fcmpe";
- break;
- default:
- form = "(FPCompare)";
+ switch (form_hash_) {
+ case "fcmpe_dz_floatcmp"_h:
+ case "fcmpe_hz_floatcmp"_h:
+ case "fcmpe_sz_floatcmp"_h:
+ case "fcmp_dz_floatcmp"_h:
+ case "fcmp_hz_floatcmp"_h:
+ case "fcmp_sz_floatcmp"_h:
+ form = "'Fn, #0.0";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitFPConditionalCompare(const Instruction *instr) {
- const char *mnemonic = "unmplemented";
- const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
-
- switch (instr->Mask(FPConditionalCompareMask)) {
- case FCCMP_h:
- case FCCMP_s:
- case FCCMP_d:
- mnemonic = "fccmp";
- break;
- case FCCMPE_h:
- case FCCMPE_s:
- case FCCMPE_d:
- mnemonic = "fccmpe";
- break;
- default:
- form = "(FPConditionalCompare)";
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Fn, 'Fm, 'INzcv, 'Cond");
}
void Disassembler::VisitFPConditionalSelect(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
-
- switch (instr->Mask(FPConditionalSelectMask)) {
- case FCSEL_h:
- case FCSEL_s:
- case FCSEL_d:
- mnemonic = "fcsel";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Cond");
}
void Disassembler::VisitFPDataProcessing1Source(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "'Fd, 'Fn";
-
- switch (instr->Mask(FPDataProcessing1SourceMask)) {
-#define FORMAT(A, B) \
- case A##_h: \
- case A##_s: \
- case A##_d: \
- mnemonic = B; \
- break;
- FORMAT(FMOV, "fmov");
- FORMAT(FABS, "fabs");
- FORMAT(FNEG, "fneg");
- FORMAT(FSQRT, "fsqrt");
- FORMAT(FRINTN, "frintn");
- FORMAT(FRINTP, "frintp");
- FORMAT(FRINTM, "frintm");
- FORMAT(FRINTZ, "frintz");
- FORMAT(FRINTA, "frinta");
- FORMAT(FRINTX, "frintx");
- FORMAT(FRINTI, "frinti");
-#undef FORMAT
-#define FORMAT(A, B) \
- case A##_s: \
- case A##_d: \
- mnemonic = B; \
- break;
- FORMAT(FRINT32X, "frint32x");
- FORMAT(FRINT32Z, "frint32z");
- FORMAT(FRINT64X, "frint64x");
- FORMAT(FRINT64Z, "frint64z");
-#undef FORMAT
- case FCVT_ds:
- mnemonic = "fcvt";
+ switch (form_hash_) {
+ case "fcvt_ds_floatdp1"_h:
form = "'Dd, 'Sn";
break;
- case FCVT_sd:
- mnemonic = "fcvt";
+ case "fcvt_sd_floatdp1"_h:
form = "'Sd, 'Dn";
break;
- case FCVT_hs:
- mnemonic = "fcvt";
+ case "fcvt_hs_floatdp1"_h:
form = "'Hd, 'Sn";
break;
- case FCVT_sh:
- mnemonic = "fcvt";
+ case "fcvt_sh_floatdp1"_h:
form = "'Sd, 'Hn";
break;
- case FCVT_dh:
- mnemonic = "fcvt";
+ case "fcvt_dh_floatdp1"_h:
form = "'Dd, 'Hn";
break;
- case FCVT_hd:
- mnemonic = "fcvt";
+ case "fcvt_hd_floatdp1"_h:
form = "'Hd, 'Dn";
break;
- default:
- form = "(FPDataProcessing1Source)";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitFPDataProcessing2Source(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Fd, 'Fn, 'Fm";
-
- switch (instr->Mask(FPDataProcessing2SourceMask)) {
-#define FORMAT(A, B) \
- case A##_h: \
- case A##_s: \
- case A##_d: \
- mnemonic = B; \
- break;
- FORMAT(FADD, "fadd");
- FORMAT(FSUB, "fsub");
- FORMAT(FMUL, "fmul");
- FORMAT(FDIV, "fdiv");
- FORMAT(FMAX, "fmax");
- FORMAT(FMIN, "fmin");
- FORMAT(FMAXNM, "fmaxnm");
- FORMAT(FMINNM, "fminnm");
- FORMAT(FNMUL, "fnmul");
-#undef FORMAT
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm");
}
void Disassembler::VisitFPDataProcessing3Source(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Fd, 'Fn, 'Fm, 'Fa";
-
- switch (instr->Mask(FPDataProcessing3SourceMask)) {
-#define FORMAT(A, B) \
- case A##_h: \
- case A##_s: \
- case A##_d: \
- mnemonic = B; \
- break;
- FORMAT(FMADD, "fmadd");
- FORMAT(FMSUB, "fmsub");
- FORMAT(FNMADD, "fnmadd");
- FORMAT(FNMSUB, "fnmsub");
-#undef FORMAT
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Fa");
}
void Disassembler::VisitFPImmediate(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "(FPImmediate)";
- switch (instr->Mask(FPImmediateMask)) {
- case FMOV_h_imm:
- mnemonic = "fmov";
- form = "'Hd, 'IFP";
+ const char *form = "'Hd";
+ const char *suffix = ", 'IFP";
+ switch (form_hash_) {
+ case "fmov_s_floatimm"_h:
+ form = "'Sd";
break;
- case FMOV_s_imm:
- mnemonic = "fmov";
- form = "'Sd, 'IFP";
+ case "fmov_d_floatimm"_h:
+ form = "'Dd";
break;
- case FMOV_d_imm:
- mnemonic = "fmov";
- form = "'Dd, 'IFP";
- break;
- default:
- VIXL_UNREACHABLE();
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitFPIntegerConvert(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(FPIntegerConvert)";
- const char *form_rf = "'Rd, 'Fn";
- const char *form_fr = "'Fd, 'Rn";
-
- switch (instr->Mask(FPIntegerConvertMask)) {
- case FMOV_wh:
- case FMOV_xh:
- case FMOV_ws:
- case FMOV_xd:
- mnemonic = "fmov";
- form = form_rf;
- break;
- case FMOV_hw:
- case FMOV_hx:
- case FMOV_sw:
- case FMOV_dx:
- mnemonic = "fmov";
- form = form_fr;
- break;
- case FMOV_d1_x:
- mnemonic = "fmov";
+ const char *form = "'Rd, 'Fn";
+ switch (form_hash_) {
+ case "fmov_h32_float2int"_h:
+ case "fmov_h64_float2int"_h:
+ case "fmov_s32_float2int"_h:
+ case "fmov_d64_float2int"_h:
+ case "scvtf_d32_float2int"_h:
+ case "scvtf_d64_float2int"_h:
+ case "scvtf_h32_float2int"_h:
+ case "scvtf_h64_float2int"_h:
+ case "scvtf_s32_float2int"_h:
+ case "scvtf_s64_float2int"_h:
+ case "ucvtf_d32_float2int"_h:
+ case "ucvtf_d64_float2int"_h:
+ case "ucvtf_h32_float2int"_h:
+ case "ucvtf_h64_float2int"_h:
+ case "ucvtf_s32_float2int"_h:
+ case "ucvtf_s64_float2int"_h:
+ form = "'Fd, 'Rn";
+ break;
+ case "fmov_v64i_float2int"_h:
form = "'Vd.D[1], 'Rn";
break;
- case FMOV_x_d1:
- mnemonic = "fmov";
+ case "fmov_64vx_float2int"_h:
form = "'Rd, 'Vn.D[1]";
break;
- case FCVTAS_wh:
- case FCVTAS_xh:
- case FCVTAS_ws:
- case FCVTAS_xs:
- case FCVTAS_wd:
- case FCVTAS_xd:
- mnemonic = "fcvtas";
- form = form_rf;
- break;
- case FCVTAU_wh:
- case FCVTAU_xh:
- case FCVTAU_ws:
- case FCVTAU_xs:
- case FCVTAU_wd:
- case FCVTAU_xd:
- mnemonic = "fcvtau";
- form = form_rf;
- break;
- case FCVTMS_wh:
- case FCVTMS_xh:
- case FCVTMS_ws:
- case FCVTMS_xs:
- case FCVTMS_wd:
- case FCVTMS_xd:
- mnemonic = "fcvtms";
- form = form_rf;
- break;
- case FCVTMU_wh:
- case FCVTMU_xh:
- case FCVTMU_ws:
- case FCVTMU_xs:
- case FCVTMU_wd:
- case FCVTMU_xd:
- mnemonic = "fcvtmu";
- form = form_rf;
- break;
- case FCVTNS_wh:
- case FCVTNS_xh:
- case FCVTNS_ws:
- case FCVTNS_xs:
- case FCVTNS_wd:
- case FCVTNS_xd:
- mnemonic = "fcvtns";
- form = form_rf;
- break;
- case FCVTNU_wh:
- case FCVTNU_xh:
- case FCVTNU_ws:
- case FCVTNU_xs:
- case FCVTNU_wd:
- case FCVTNU_xd:
- mnemonic = "fcvtnu";
- form = form_rf;
- break;
- case FCVTZU_wh:
- case FCVTZU_xh:
- case FCVTZU_ws:
- case FCVTZU_xs:
- case FCVTZU_wd:
- case FCVTZU_xd:
- mnemonic = "fcvtzu";
- form = form_rf;
- break;
- case FCVTZS_wh:
- case FCVTZS_xh:
- case FCVTZS_ws:
- case FCVTZS_xs:
- case FCVTZS_wd:
- case FCVTZS_xd:
- mnemonic = "fcvtzs";
- form = form_rf;
- break;
- case FCVTPU_wh:
- case FCVTPU_xh:
- case FCVTPU_xs:
- case FCVTPU_wd:
- case FCVTPU_ws:
- case FCVTPU_xd:
- mnemonic = "fcvtpu";
- form = form_rf;
- break;
- case FCVTPS_wh:
- case FCVTPS_xh:
- case FCVTPS_ws:
- case FCVTPS_xs:
- case FCVTPS_wd:
- case FCVTPS_xd:
- mnemonic = "fcvtps";
- form = form_rf;
- break;
- case SCVTF_hw:
- case SCVTF_hx:
- case SCVTF_sw:
- case SCVTF_sx:
- case SCVTF_dw:
- case SCVTF_dx:
- mnemonic = "scvtf";
- form = form_fr;
- break;
- case UCVTF_hw:
- case UCVTF_hx:
- case UCVTF_sw:
- case UCVTF_sx:
- case UCVTF_dw:
- case UCVTF_dx:
- mnemonic = "ucvtf";
- form = form_fr;
- break;
- case FJCVTZS:
- mnemonic = "fjcvtzs";
- form = form_rf;
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitFPFixedPointConvert(const Instruction *instr) {
- const char *mnemonic = "";
- const char *form = "'Rd, 'Fn, 'IFPFBits";
- const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
-
- switch (instr->Mask(FPFixedPointConvertMask)) {
- case FCVTZS_wh_fixed:
- case FCVTZS_xh_fixed:
- case FCVTZS_ws_fixed:
- case FCVTZS_xs_fixed:
- case FCVTZS_wd_fixed:
- case FCVTZS_xd_fixed:
- mnemonic = "fcvtzs";
- break;
- case FCVTZU_wh_fixed:
- case FCVTZU_xh_fixed:
- case FCVTZU_ws_fixed:
- case FCVTZU_xs_fixed:
- case FCVTZU_wd_fixed:
- case FCVTZU_xd_fixed:
- mnemonic = "fcvtzu";
- break;
- case SCVTF_hw_fixed:
- case SCVTF_hx_fixed:
- case SCVTF_sw_fixed:
- case SCVTF_sx_fixed:
- case SCVTF_dw_fixed:
- case SCVTF_dx_fixed:
- mnemonic = "scvtf";
- form = form_fr;
- break;
- case UCVTF_hw_fixed:
- case UCVTF_hx_fixed:
- case UCVTF_sw_fixed:
- case UCVTF_sx_fixed:
- case UCVTF_dw_fixed:
- case UCVTF_dx_fixed:
- mnemonic = "ucvtf";
- form = form_fr;
+ const char *form = "'Rd, 'Fn";
+ const char *suffix = ", 'IFPFBits";
+
+ switch (form_hash_) {
+ case "scvtf_d32_float2fix"_h:
+ case "scvtf_d64_float2fix"_h:
+ case "scvtf_h32_float2fix"_h:
+ case "scvtf_h64_float2fix"_h:
+ case "scvtf_s32_float2fix"_h:
+ case "scvtf_s64_float2fix"_h:
+ case "ucvtf_d32_float2fix"_h:
+ case "ucvtf_d64_float2fix"_h:
+ case "ucvtf_h32_float2fix"_h:
+ case "ucvtf_h64_float2fix"_h:
+ case "ucvtf_s32_float2fix"_h:
+ case "ucvtf_s64_float2fix"_h:
+ form = "'Fd, 'Rn";
break;
- default:
- VIXL_UNREACHABLE();
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
-// clang-format off
-#define PAUTH_SYSTEM_MNEMONICS(V) \
- V(PACIA1716, "pacia1716") \
- V(PACIB1716, "pacib1716") \
- V(AUTIA1716, "autia1716") \
- V(AUTIB1716, "autib1716") \
- V(PACIAZ, "paciaz") \
- V(PACIASP, "paciasp") \
- V(PACIBZ, "pacibz") \
- V(PACIBSP, "pacibsp") \
- V(AUTIAZ, "autiaz") \
- V(AUTIASP, "autiasp") \
- V(AUTIBZ, "autibz") \
- V(AUTIBSP, "autibsp")
-// clang-format on
+void Disassembler::DisassembleNoArgs(const Instruction *instr) {
+ Format(instr, mnemonic_.c_str(), "");
+}
void Disassembler::VisitSystem(const Instruction *instr) {
- // Some system instructions hijack their Op and Cp fields to represent a
- // range of immediates instead of indicating a different instruction. This
- // makes the decoding tricky.
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "(System)";
- if (instr->GetInstructionBits() == XPACLRI) {
- mnemonic = "xpaclri";
- form = NULL;
- } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) {
- switch (instr->Mask(SystemPStateMask)) {
- case CFINV:
- mnemonic = "cfinv";
- form = NULL;
- break;
- case AXFLAG:
- mnemonic = "axflag";
- form = NULL;
- break;
- case XAFLAG:
- mnemonic = "xaflag";
- form = NULL;
- break;
- }
- } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
- switch (instr->Mask(SystemPAuthMask)) {
-#define PAUTH_CASE(NAME, MN) \
- case NAME: \
- mnemonic = MN; \
- form = NULL; \
- break;
+ const char *suffix = NULL;
- PAUTH_SYSTEM_MNEMONICS(PAUTH_CASE)
-#undef PAUTH_CASE
- }
- } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
- SystemExclusiveMonitorFixed) {
- switch (instr->Mask(SystemExclusiveMonitorMask)) {
- case CLREX: {
- mnemonic = "clrex";
- form = (instr->GetCRm() == 0xf) ? NULL : "'IX";
- break;
- }
- }
- } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
- switch (instr->Mask(SystemSysRegMask)) {
- case MRS: {
- mnemonic = "mrs";
- form = "'Xt, 'IY";
- break;
- }
- case MSR: {
- mnemonic = "msr";
- form = "'IY, 'Xt";
- break;
- }
- }
- } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
- form = NULL;
- switch (instr->GetImmHint()) {
- case NOP:
- mnemonic = "nop";
- break;
- case YIELD:
- mnemonic = "yield";
- break;
- case WFE:
- mnemonic = "wfe";
- break;
- case WFI:
- mnemonic = "wfi";
- break;
- case SEV:
- mnemonic = "sev";
- break;
- case SEVL:
- mnemonic = "sevl";
- break;
- case ESB:
- mnemonic = "esb";
- break;
- case CSDB:
- mnemonic = "csdb";
- break;
- case BTI:
- mnemonic = "bti";
- break;
- case BTI_c:
- mnemonic = "bti c";
- break;
- case BTI_j:
- mnemonic = "bti j";
- break;
- case BTI_jc:
- mnemonic = "bti jc";
- break;
- default:
- // Fall back to 'hint #<imm7>'.
- form = "'IH";
- mnemonic = "hint";
- break;
- }
- } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
- switch (instr->Mask(MemBarrierMask)) {
- case DMB: {
- mnemonic = "dmb";
- form = "'M";
- break;
- }
- case DSB: {
- mnemonic = "dsb";
- form = "'M";
- break;
- }
- case ISB: {
- mnemonic = "isb";
- form = NULL;
- break;
+ switch (form_hash_) {
+ case "clrex_bn_barriers"_h:
+ form = (instr->GetCRm() == 0xf) ? "" : "'IX";
+ break;
+ case "mrs_rs_systemmove"_h:
+ form = "'Xt, 'IY";
+ break;
+ case "msr_si_pstate"_h:
+ case "msr_sr_systemmove"_h:
+ form = "'IY, 'Xt";
+ break;
+ case "bti_hb_hints"_h:
+ switch (instr->ExtractBits(7, 6)) {
+ case 0:
+ form = "";
+ break;
+ case 1:
+ form = "c";
+ break;
+ case 2:
+ form = "j";
+ break;
+ case 3:
+ form = "jc";
+ break;
}
- }
- } else if (instr->Mask(SystemSysFMask) == SystemSysFixed) {
- switch (instr->GetSysOp()) {
- case IVAU:
- mnemonic = "ic";
- form = "ivau, 'Xt";
- break;
- case CVAC:
- mnemonic = "dc";
- form = "cvac, 'Xt";
- break;
- case CVAU:
- mnemonic = "dc";
- form = "cvau, 'Xt";
- break;
- case CVAP:
- mnemonic = "dc";
- form = "cvap, 'Xt";
- break;
- case CVADP:
- mnemonic = "dc";
- form = "cvadp, 'Xt";
- break;
- case CIVAC:
- mnemonic = "dc";
- form = "civac, 'Xt";
- break;
- case ZVA:
- mnemonic = "dc";
- form = "zva, 'Xt";
- break;
- default:
- mnemonic = "sys";
- if (instr->GetRt() == 31) {
+ break;
+ case "hint_hm_hints"_h:
+ form = "'IH";
+ break;
+ case "dmb_bo_barriers"_h:
+ case "dsb_bo_barriers"_h:
+ form = "'M";
+ break;
+ case "sys_cr_systeminstrs"_h:
+ mnemonic = "dc";
+ suffix = ", 'Xt";
+ switch (instr->GetSysOp()) {
+ case IVAU:
+ mnemonic = "ic";
+ form = "ivau";
+ break;
+ case CVAC:
+ form = "cvac";
+ break;
+ case CVAU:
+ form = "cvau";
+ break;
+ case CVAP:
+ form = "cvap";
+ break;
+ case CVADP:
+ form = "cvadp";
+ break;
+ case CIVAC:
+ form = "civac";
+ break;
+ case ZVA:
+ form = "zva";
+ break;
+ default:
+ mnemonic = "sys";
form = "'G1, 'Kn, 'Km, 'G2";
- } else {
- form = "'G1, 'Kn, 'Km, 'G2, 'Xt";
- }
- break;
- }
+ if (instr->GetRt() == 31) {
+ suffix = NULL;
+ }
+ break;
+ }
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
@@ -2341,1098 +2056,455 @@ void Disassembler::VisitCryptoAES(const Instruction *instr) {
VisitUnimplemented(instr);
}
-
-void Disassembler::VisitNEON2RegMisc(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+void Disassembler::DisassembleNEON2RegAddlp(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Vd.%s, 'Vn.%s";
- const char *form_cmp_zero = "'Vd.%s, 'Vn.%s, #0";
- const char *form_fcmp_zero = "'Vd.%s, 'Vn.%s, #0.0";
- NEONFormatDecoder nfd(instr);
static const NEONFormatMap map_lp_ta =
{{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
+ NEONFormatDecoder nfd(instr);
+ nfd.SetFormatMap(0, &map_lp_ta);
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+void Disassembler::DisassembleNEON2RegCompare(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, #0";
+ NEONFormatDecoder nfd(instr);
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+void Disassembler::DisassembleNEON2RegFPCompare(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, #0.0";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap());
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+void Disassembler::DisassembleNEON2RegFPConvert(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
static const NEONFormatMap map_cvt_ta = {{22}, {NF_4S, NF_2D}};
static const NEONFormatMap map_cvt_tb = {{22, 30},
{NF_4H, NF_8H, NF_2S, NF_4S}};
+ NEONFormatDecoder nfd(instr, &map_cvt_tb, &map_cvt_ta);
- if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
- // These instructions all use a two bit size field, except NOT and RBIT,
- // which use the field to encode the operation.
- switch (instr->Mask(NEON2RegMiscMask)) {
- case NEON_REV64:
- mnemonic = "rev64";
- break;
- case NEON_REV32:
- mnemonic = "rev32";
- break;
- case NEON_REV16:
- mnemonic = "rev16";
- break;
- case NEON_SADDLP:
- mnemonic = "saddlp";
- nfd.SetFormatMap(0, &map_lp_ta);
- break;
- case NEON_UADDLP:
- mnemonic = "uaddlp";
- nfd.SetFormatMap(0, &map_lp_ta);
- break;
- case NEON_SUQADD:
- mnemonic = "suqadd";
- break;
- case NEON_USQADD:
- mnemonic = "usqadd";
- break;
- case NEON_CLS:
- mnemonic = "cls";
- break;
- case NEON_CLZ:
- mnemonic = "clz";
- break;
- case NEON_CNT:
- mnemonic = "cnt";
- break;
- case NEON_SADALP:
- mnemonic = "sadalp";
- nfd.SetFormatMap(0, &map_lp_ta);
- break;
- case NEON_UADALP:
- mnemonic = "uadalp";
- nfd.SetFormatMap(0, &map_lp_ta);
- break;
- case NEON_SQABS:
- mnemonic = "sqabs";
- break;
- case NEON_SQNEG:
- mnemonic = "sqneg";
- break;
- case NEON_CMGT_zero:
- mnemonic = "cmgt";
- form = form_cmp_zero;
- break;
- case NEON_CMGE_zero:
- mnemonic = "cmge";
- form = form_cmp_zero;
- break;
- case NEON_CMEQ_zero:
- mnemonic = "cmeq";
- form = form_cmp_zero;
- break;
- case NEON_CMLE_zero:
- mnemonic = "cmle";
- form = form_cmp_zero;
- break;
- case NEON_CMLT_zero:
- mnemonic = "cmlt";
- form = form_cmp_zero;
+ VectorFormat vform_dst = nfd.GetVectorFormat(0);
+ switch (form_hash_) {
+ case "fcvtl_asimdmisc_l"_h:
+ nfd.SetFormatMaps(&map_cvt_ta, &map_cvt_tb);
+ break;
+ case "fcvtxn_asimdmisc_n"_h:
+ if ((vform_dst != kFormat2S) && (vform_dst != kFormat4S)) {
+ mnemonic = NULL;
+ }
+ break;
+ }
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+}
+
+void Disassembler::DisassembleNEON2RegFP(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap());
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+void Disassembler::DisassembleNEON2RegLogical(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+ if (form_hash_ == "not_asimdmisc_r"_h) {
+ mnemonic = "mvn";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
+void Disassembler::DisassembleNEON2RegExtract(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
+ const char *suffix = NULL;
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::IntegerFormatMap(),
+ NEONFormatDecoder::LongIntegerFormatMap());
+
+ if (form_hash_ == "shll_asimdmisc_s"_h) {
+ nfd.SetFormatMaps(nfd.LongIntegerFormatMap(), nfd.IntegerFormatMap());
+ switch (instr->GetNEONSize()) {
+ case 0:
+ suffix = ", #8";
break;
- case NEON_ABS:
- mnemonic = "abs";
+ case 1:
+ suffix = ", #16";
break;
- case NEON_NEG:
- mnemonic = "neg";
- break;
- case NEON_RBIT_NOT:
- switch (instr->GetFPType()) {
- case 0:
- mnemonic = "mvn";
- break;
- case 1:
- mnemonic = "rbit";
- break;
- default:
- form = "(NEON2RegMisc)";
- }
- nfd.SetFormatMaps(nfd.LogicalFormatMap());
+ case 2:
+ suffix = ", #32";
break;
}
- } else {
- // These instructions all use a one bit size field, except XTN, SQXTUN,
- // SHLL, SQXTN and UQXTN, which use a two bit size field.
- nfd.SetFormatMaps(nfd.FPFormatMap());
- switch (instr->Mask(NEON2RegMiscFPMask)) {
- case NEON_FABS:
- mnemonic = "fabs";
- break;
- case NEON_FNEG:
- mnemonic = "fneg";
- break;
- case NEON_FCVTN:
- mnemonic = instr->Mask(NEON_Q) ? "fcvtn2" : "fcvtn";
- nfd.SetFormatMap(0, &map_cvt_tb);
- nfd.SetFormatMap(1, &map_cvt_ta);
- break;
- case NEON_FCVTXN:
- mnemonic = instr->Mask(NEON_Q) ? "fcvtxn2" : "fcvtxn";
- nfd.SetFormatMap(0, &map_cvt_tb);
- nfd.SetFormatMap(1, &map_cvt_ta);
- break;
- case NEON_FCVTL:
- mnemonic = instr->Mask(NEON_Q) ? "fcvtl2" : "fcvtl";
- nfd.SetFormatMap(0, &map_cvt_ta);
- nfd.SetFormatMap(1, &map_cvt_tb);
- break;
- case NEON_FRINT32X:
- mnemonic = "frint32x";
- break;
- case NEON_FRINT32Z:
- mnemonic = "frint32z";
- break;
- case NEON_FRINT64X:
- mnemonic = "frint64x";
- break;
- case NEON_FRINT64Z:
- mnemonic = "frint64z";
- break;
- case NEON_FRINTN:
- mnemonic = "frintn";
- break;
- case NEON_FRINTA:
- mnemonic = "frinta";
- break;
- case NEON_FRINTP:
- mnemonic = "frintp";
- break;
- case NEON_FRINTM:
- mnemonic = "frintm";
- break;
- case NEON_FRINTX:
- mnemonic = "frintx";
- break;
- case NEON_FRINTZ:
- mnemonic = "frintz";
- break;
- case NEON_FRINTI:
- mnemonic = "frinti";
- break;
- case NEON_FCVTNS:
- mnemonic = "fcvtns";
- break;
- case NEON_FCVTNU:
- mnemonic = "fcvtnu";
- break;
- case NEON_FCVTPS:
- mnemonic = "fcvtps";
- break;
- case NEON_FCVTPU:
- mnemonic = "fcvtpu";
- break;
- case NEON_FCVTMS:
- mnemonic = "fcvtms";
- break;
- case NEON_FCVTMU:
- mnemonic = "fcvtmu";
- break;
- case NEON_FCVTZS:
- mnemonic = "fcvtzs";
- break;
- case NEON_FCVTZU:
- mnemonic = "fcvtzu";
- break;
- case NEON_FCVTAS:
- mnemonic = "fcvtas";
- break;
- case NEON_FCVTAU:
- mnemonic = "fcvtau";
- break;
- case NEON_FSQRT:
- mnemonic = "fsqrt";
- break;
- case NEON_SCVTF:
- mnemonic = "scvtf";
- break;
- case NEON_UCVTF:
- mnemonic = "ucvtf";
- break;
- case NEON_URSQRTE:
- mnemonic = "ursqrte";
- break;
- case NEON_URECPE:
- mnemonic = "urecpe";
- break;
- case NEON_FRSQRTE:
- mnemonic = "frsqrte";
- break;
- case NEON_FRECPE:
- mnemonic = "frecpe";
- break;
- case NEON_FCMGT_zero:
- mnemonic = "fcmgt";
- form = form_fcmp_zero;
- break;
- case NEON_FCMGE_zero:
- mnemonic = "fcmge";
- form = form_fcmp_zero;
- break;
- case NEON_FCMEQ_zero:
- mnemonic = "fcmeq";
- form = form_fcmp_zero;
- break;
- case NEON_FCMLE_zero:
- mnemonic = "fcmle";
- form = form_fcmp_zero;
+ }
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix);
+}
+
+void Disassembler::VisitNEON2RegMisc(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
+ NEONFormatDecoder nfd(instr);
+
+ VectorFormat vform_dst = nfd.GetVectorFormat(0);
+ if (vform_dst != kFormatUndefined) {
+ uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst);
+ switch (form_hash_) {
+ case "cnt_asimdmisc_r"_h:
+ case "rev16_asimdmisc_r"_h:
+ if (ls_dst != kBRegSize) {
+ mnemonic = NULL;
+ }
break;
- case NEON_FCMLT_zero:
- mnemonic = "fcmlt";
- form = form_fcmp_zero;
+ case "rev32_asimdmisc_r"_h:
+ if ((ls_dst == kDRegSize) || (ls_dst == kSRegSize)) {
+ mnemonic = NULL;
+ }
break;
- default:
- if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
- (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
- nfd.SetFormatMap(0, nfd.IntegerFormatMap());
- nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
-
- switch (instr->Mask(NEON2RegMiscMask)) {
- case NEON_XTN:
- mnemonic = "xtn";
- break;
- case NEON_SQXTN:
- mnemonic = "sqxtn";
- break;
- case NEON_UQXTN:
- mnemonic = "uqxtn";
- break;
- case NEON_SQXTUN:
- mnemonic = "sqxtun";
- break;
- case NEON_SHLL:
- mnemonic = "shll";
- nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
- nfd.SetFormatMap(1, nfd.IntegerFormatMap());
- switch (instr->GetNEONSize()) {
- case 0:
- form = "'Vd.%s, 'Vn.%s, #8";
- break;
- case 1:
- form = "'Vd.%s, 'Vn.%s, #16";
- break;
- case 2:
- form = "'Vd.%s, 'Vn.%s, #32";
- break;
- default:
- Format(instr, "unallocated", "(NEON2RegMisc)");
- return;
- }
- }
- Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
- return;
- } else {
- form = "(NEON2RegMisc)";
+ case "urecpe_asimdmisc_r"_h:
+ case "ursqrte_asimdmisc_r"_h:
+ // For urecpe and ursqrte, only S-sized elements are supported. The MSB
+ // of the size field is always set by the instruction (0b1x) so we need
+ // only check and discard D-sized elements here.
+ VIXL_ASSERT((ls_dst == kSRegSize) || (ls_dst == kDRegSize));
+ VIXL_FALLTHROUGH();
+ case "clz_asimdmisc_r"_h:
+ case "cls_asimdmisc_r"_h:
+ case "rev64_asimdmisc_r"_h:
+ if (ls_dst == kDRegSize) {
+ mnemonic = NULL;
}
+ break;
}
}
+
Format(instr, mnemonic, nfd.Substitute(form));
}
void Disassembler::VisitNEON2RegMiscFP16(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Vd.%s, 'Vn.%s";
- const char *form_cmp = "'Vd.%s, 'Vn.%s, #0.0";
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.'?30:84h, 'Vn.'?30:84h";
+ const char *suffix = NULL;
- static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
- NEONFormatDecoder nfd(instr, &map_half);
+ switch (form_hash_) {
+ case "fcmeq_asimdmiscfp16_fz"_h:
+ case "fcmge_asimdmiscfp16_fz"_h:
+ case "fcmgt_asimdmiscfp16_fz"_h:
+ case "fcmle_asimdmiscfp16_fz"_h:
+ case "fcmlt_asimdmiscfp16_fz"_h:
+ suffix = ", #0.0";
+ }
+ Format(instr, mnemonic, form, suffix);
+}
- switch (instr->Mask(NEON2RegMiscFP16Mask)) {
-// clang-format off
-#define FORMAT(A, B) \
- case NEON_##A##_H: \
- mnemonic = B; \
- break;
- FORMAT(FABS, "fabs")
- FORMAT(FCVTAS, "fcvtas")
- FORMAT(FCVTAU, "fcvtau")
- FORMAT(FCVTMS, "fcvtms")
- FORMAT(FCVTMU, "fcvtmu")
- FORMAT(FCVTNS, "fcvtns")
- FORMAT(FCVTNU, "fcvtnu")
- FORMAT(FCVTPS, "fcvtps")
- FORMAT(FCVTPU, "fcvtpu")
- FORMAT(FCVTZS, "fcvtzs")
- FORMAT(FCVTZU, "fcvtzu")
- FORMAT(FNEG, "fneg")
- FORMAT(FRECPE, "frecpe")
- FORMAT(FRINTA, "frinta")
- FORMAT(FRINTI, "frinti")
- FORMAT(FRINTM, "frintm")
- FORMAT(FRINTN, "frintn")
- FORMAT(FRINTP, "frintp")
- FORMAT(FRINTX, "frintx")
- FORMAT(FRINTZ, "frintz")
- FORMAT(FRSQRTE, "frsqrte")
- FORMAT(FSQRT, "fsqrt")
- FORMAT(SCVTF, "scvtf")
- FORMAT(UCVTF, "ucvtf")
-// clang-format on
-#undef FORMAT
+void Disassembler::DisassembleNEON3SameLogical(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
- case NEON_FCMEQ_H_zero:
- mnemonic = "fcmeq";
- form = form_cmp;
- break;
- case NEON_FCMGT_H_zero:
- mnemonic = "fcmgt";
- form = form_cmp;
- break;
- case NEON_FCMGE_H_zero:
- mnemonic = "fcmge";
- form = form_cmp;
- break;
- case NEON_FCMLT_H_zero:
- mnemonic = "fcmlt";
- form = form_cmp;
- break;
- case NEON_FCMLE_H_zero:
- mnemonic = "fcmle";
- form = form_cmp;
+ switch (form_hash_) {
+ case "orr_asimdsame_only"_h:
+ if (instr->GetRm() == instr->GetRn()) {
+ mnemonic = "mov";
+ form = "'Vd.%s, 'Vn.%s";
+ }
break;
- default:
- form = "(NEON2RegMiscFP16)";
+ case "pmul_asimdsame_only"_h:
+ if (instr->GetNEONSize() != 0) {
+ mnemonic = NULL;
+ }
}
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::DisassembleNEON3SameFHM(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Vd.'?30:42s, 'Vn.'?30:42h, 'Vm.'?30:42h");
+}
+
+void Disassembler::DisassembleNEON3SameNoD(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+ static const NEONFormatMap map =
+ {{23, 22, 30},
+ {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}};
+ NEONFormatDecoder nfd(instr, &map);
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
void Disassembler::VisitNEON3Same(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
NEONFormatDecoder nfd(instr);
- if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
- switch (instr->Mask(NEON3SameLogicalMask)) {
- case NEON_AND:
- mnemonic = "and";
- break;
- case NEON_ORR:
- mnemonic = "orr";
- if (instr->GetRm() == instr->GetRn()) {
- mnemonic = "mov";
- form = "'Vd.%s, 'Vn.%s";
+ if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
+ nfd.SetFormatMaps(nfd.FPFormatMap());
+ }
+
+ VectorFormat vform_dst = nfd.GetVectorFormat(0);
+ if (vform_dst != kFormatUndefined) {
+ uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst);
+ switch (form_hash_) {
+ case "sqdmulh_asimdsame_only"_h:
+ case "sqrdmulh_asimdsame_only"_h:
+ if ((ls_dst == kBRegSize) || (ls_dst == kDRegSize)) {
+ mnemonic = NULL;
}
break;
- case NEON_ORN:
- mnemonic = "orn";
- break;
- case NEON_EOR:
- mnemonic = "eor";
- break;
- case NEON_BIC:
- mnemonic = "bic";
- break;
- case NEON_BIF:
- mnemonic = "bif";
- break;
- case NEON_BIT:
- mnemonic = "bit";
- break;
- case NEON_BSL:
- mnemonic = "bsl";
- break;
- default:
- form = "(NEON3Same)";
- }
- nfd.SetFormatMaps(nfd.LogicalFormatMap());
- } else {
- static const char kUnknown[] = "unallocated";
- static const char *mnemonics[] = {"shadd",
- "uhadd",
- "shadd",
- "uhadd",
- "sqadd",
- "uqadd",
- "sqadd",
- "uqadd",
- "srhadd",
- "urhadd",
- "srhadd",
- "urhadd",
- // Handled by logical cases above.
- NULL,
- NULL,
- NULL,
- NULL,
- "shsub",
- "uhsub",
- "shsub",
- "uhsub",
- "sqsub",
- "uqsub",
- "sqsub",
- "uqsub",
- "cmgt",
- "cmhi",
- "cmgt",
- "cmhi",
- "cmge",
- "cmhs",
- "cmge",
- "cmhs",
- "sshl",
- "ushl",
- "sshl",
- "ushl",
- "sqshl",
- "uqshl",
- "sqshl",
- "uqshl",
- "srshl",
- "urshl",
- "srshl",
- "urshl",
- "sqrshl",
- "uqrshl",
- "sqrshl",
- "uqrshl",
- "smax",
- "umax",
- "smax",
- "umax",
- "smin",
- "umin",
- "smin",
- "umin",
- "sabd",
- "uabd",
- "sabd",
- "uabd",
- "saba",
- "uaba",
- "saba",
- "uaba",
- "add",
- "sub",
- "add",
- "sub",
- "cmtst",
- "cmeq",
- "cmtst",
- "cmeq",
- "mla",
- "mls",
- "mla",
- "mls",
- "mul",
- "pmul",
- "mul",
- "pmul",
- "smaxp",
- "umaxp",
- "smaxp",
- "umaxp",
- "sminp",
- "uminp",
- "sminp",
- "uminp",
- "sqdmulh",
- "sqrdmulh",
- "sqdmulh",
- "sqrdmulh",
- "addp",
- kUnknown,
- "addp",
- kUnknown,
- "fmaxnm",
- "fmaxnmp",
- "fminnm",
- "fminnmp",
- "fmla",
- kUnknown, // FMLAL2 or unallocated
- "fmls",
- kUnknown, // FMLSL2 or unallocated
- "fadd",
- "faddp",
- "fsub",
- "fabd",
- "fmulx",
- "fmul",
- kUnknown,
- kUnknown,
- "fcmeq",
- "fcmge",
- kUnknown,
- "fcmgt",
- kUnknown, // FMLAL or unallocated
- "facge",
- kUnknown, // FMLSL or unallocated
- "facgt",
- "fmax",
- "fmaxp",
- "fmin",
- "fminp",
- "frecps",
- "fdiv",
- "frsqrts",
- kUnknown};
-
- // Operation is determined by the opcode bits (15-11), the top bit of
- // size (23) and the U bit (29).
- unsigned index = (instr->ExtractBits(15, 11) << 2) |
- (instr->ExtractBit(23) << 1) | instr->ExtractBit(29);
- VIXL_ASSERT(index < ArrayLength(mnemonics));
- mnemonic = mnemonics[index];
- // Assert that index is not one of the previously handled logical
- // instructions.
- VIXL_ASSERT(mnemonic != NULL);
-
- if (mnemonic == kUnknown) {
- // Catch special cases where we need to check more bits than we have in
- // the table index. Anything not matched here is unallocated.
-
- const char *fhm_form = (instr->Mask(NEON_Q) == 0)
- ? "'Vd.2s, 'Vn.2h, 'Vm.2h"
- : "'Vd.4s, 'Vn.4h, 'Vm.4h";
- switch (instr->Mask(NEON3SameFHMMask)) {
- case NEON_FMLAL:
- mnemonic = "fmlal";
- form = fhm_form;
- break;
- case NEON_FMLAL2:
- mnemonic = "fmlal2";
- form = fhm_form;
- break;
- case NEON_FMLSL:
- mnemonic = "fmlsl";
- form = fhm_form;
- break;
- case NEON_FMLSL2:
- mnemonic = "fmlsl2";
- form = fhm_form;
- break;
- default:
- VIXL_ASSERT(strcmp(mnemonic, "unallocated") == 0);
- form = "(NEON3Same)";
- break;
- }
- }
-
- if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
- nfd.SetFormatMaps(nfd.FPFormatMap());
}
}
Format(instr, mnemonic, nfd.Substitute(form));
}
void Disassembler::VisitNEON3SameFP16(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
-
NEONFormatDecoder nfd(instr);
nfd.SetFormatMaps(nfd.FP16FormatMap());
-
- switch (instr->Mask(NEON3SameFP16Mask)) {
-#define FORMAT(A, B) \
- case NEON_##A##_H: \
- mnemonic = B; \
- break;
- FORMAT(FMAXNM, "fmaxnm");
- FORMAT(FMLA, "fmla");
- FORMAT(FADD, "fadd");
- FORMAT(FMULX, "fmulx");
- FORMAT(FCMEQ, "fcmeq");
- FORMAT(FMAX, "fmax");
- FORMAT(FRECPS, "frecps");
- FORMAT(FMINNM, "fminnm");
- FORMAT(FMLS, "fmls");
- FORMAT(FSUB, "fsub");
- FORMAT(FMIN, "fmin");
- FORMAT(FRSQRTS, "frsqrts");
- FORMAT(FMAXNMP, "fmaxnmp");
- FORMAT(FADDP, "faddp");
- FORMAT(FMUL, "fmul");
- FORMAT(FCMGE, "fcmge");
- FORMAT(FACGE, "facge");
- FORMAT(FMAXP, "fmaxp");
- FORMAT(FDIV, "fdiv");
- FORMAT(FMINNMP, "fminnmp");
- FORMAT(FABD, "fabd");
- FORMAT(FCMGT, "fcmgt");
- FORMAT(FACGT, "facgt");
- FORMAT(FMINP, "fminp");
-#undef FORMAT
- default:
- form = "(NEON3SameFP16)";
- }
-
Format(instr, mnemonic, nfd.Substitute(form));
}
void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
- const char *mnemonic = "unallocated";
- const char *form = "(NEON3SameExtra)";
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+ const char *suffix = NULL;
NEONFormatDecoder nfd(instr);
- if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) {
- mnemonic = "fcmla";
- form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCNM";
- } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) {
- mnemonic = "fcadd";
- form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCNA";
- } else {
- form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
- switch (instr->Mask(NEON3SameExtraMask)) {
- case NEON_SDOT:
- mnemonic = "sdot";
- nfd.SetFormatMap(1, &map_usdot);
- nfd.SetFormatMap(2, &map_usdot);
- break;
- case NEON_SQRDMLAH:
- mnemonic = "sqrdmlah";
- break;
- case NEON_UDOT:
- mnemonic = "udot";
- nfd.SetFormatMap(1, &map_usdot);
- nfd.SetFormatMap(2, &map_usdot);
- break;
- case NEON_SQRDMLSH:
- mnemonic = "sqrdmlsh";
- break;
- }
+ switch (form_hash_) {
+ case "fcmla_asimdsame2_c"_h:
+ suffix = ", #'u1211*90";
+ break;
+ case "fcadd_asimdsame2_c"_h:
+ // Bit 10 is always set, so this gives 90 * 1 or 3.
+ suffix = ", #'u1212:1010*90";
+ break;
+ case "sdot_asimdsame2_d"_h:
+ case "udot_asimdsame2_d"_h:
+ case "usdot_asimdsame2_d"_h:
+ nfd.SetFormatMap(1, &map_usdot);
+ nfd.SetFormatMap(2, &map_usdot);
+ break;
+ default:
+ // sqrdml[as]h - nothing to do.
+ break;
}
- Format(instr, mnemonic, nfd.Substitute(form));
+ Format(instr, mnemonic, nfd.Substitute(form), suffix);
}
void Disassembler::VisitNEON3Different(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
NEONFormatDecoder nfd(instr);
nfd.SetFormatMap(0, nfd.LongIntegerFormatMap());
- // Ignore the Q bit. Appending a "2" suffix is handled later.
- switch (instr->Mask(NEON3DifferentMask) & ~NEON_Q) {
- case NEON_PMULL:
- mnemonic = "pmull";
- break;
- case NEON_SABAL:
- mnemonic = "sabal";
- break;
- case NEON_SABDL:
- mnemonic = "sabdl";
- break;
- case NEON_SADDL:
- mnemonic = "saddl";
- break;
- case NEON_SMLAL:
- mnemonic = "smlal";
- break;
- case NEON_SMLSL:
- mnemonic = "smlsl";
- break;
- case NEON_SMULL:
- mnemonic = "smull";
- break;
- case NEON_SSUBL:
- mnemonic = "ssubl";
- break;
- case NEON_SQDMLAL:
- mnemonic = "sqdmlal";
- break;
- case NEON_SQDMLSL:
- mnemonic = "sqdmlsl";
- break;
- case NEON_SQDMULL:
- mnemonic = "sqdmull";
- break;
- case NEON_UABAL:
- mnemonic = "uabal";
- break;
- case NEON_UABDL:
- mnemonic = "uabdl";
- break;
- case NEON_UADDL:
- mnemonic = "uaddl";
- break;
- case NEON_UMLAL:
- mnemonic = "umlal";
- break;
- case NEON_UMLSL:
- mnemonic = "umlsl";
- break;
- case NEON_UMULL:
- mnemonic = "umull";
- break;
- case NEON_USUBL:
- mnemonic = "usubl";
- break;
- case NEON_SADDW:
- mnemonic = "saddw";
- nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
- break;
- case NEON_SSUBW:
- mnemonic = "ssubw";
- nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
- break;
- case NEON_UADDW:
- mnemonic = "uaddw";
+ switch (form_hash_) {
+ case "saddw_asimddiff_w"_h:
+ case "ssubw_asimddiff_w"_h:
+ case "uaddw_asimddiff_w"_h:
+ case "usubw_asimddiff_w"_h:
nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
break;
- case NEON_USUBW:
- mnemonic = "usubw";
- nfd.SetFormatMap(1, nfd.LongIntegerFormatMap());
- break;
- case NEON_ADDHN:
- mnemonic = "addhn";
- nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
- nfd.SetFormatMap(0, nfd.IntegerFormatMap());
- break;
- case NEON_RADDHN:
- mnemonic = "raddhn";
+ case "addhn_asimddiff_n"_h:
+ case "raddhn_asimddiff_n"_h:
+ case "rsubhn_asimddiff_n"_h:
+ case "subhn_asimddiff_n"_h:
nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
nfd.SetFormatMap(0, nfd.IntegerFormatMap());
break;
- case NEON_RSUBHN:
- mnemonic = "rsubhn";
- nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
- nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ case "pmull_asimddiff_l"_h:
+ if (nfd.GetVectorFormat(0) != kFormat8H) {
+ mnemonic = NULL;
+ }
break;
- case NEON_SUBHN:
- mnemonic = "subhn";
- nfd.SetFormatMaps(nfd.LongIntegerFormatMap());
- nfd.SetFormatMap(0, nfd.IntegerFormatMap());
+ case "sqdmlal_asimddiff_l"_h:
+ case "sqdmlsl_asimddiff_l"_h:
+ case "sqdmull_asimddiff_l"_h:
+ if (nfd.GetVectorFormat(0) == kFormat8H) {
+ mnemonic = NULL;
+ }
break;
- default:
- form = "(NEON3Different)";
}
Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
}
+void Disassembler::DisassembleNEONFPAcrossLanes(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Sd, 'Vn.4s";
+ if ((instr->GetNEONQ() == 0) || (instr->ExtractBit(22) == 1)) {
+ mnemonic = NULL;
+ }
+ Format(instr, mnemonic, form);
+}
+
+void Disassembler::DisassembleNEONFP16AcrossLanes(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Hd, 'Vn.'?30:84h");
+}
void Disassembler::VisitNEONAcrossLanes(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "%sd, 'Vn.%s";
- const char *form_half = "'Hd, 'Vn.%s";
- bool half_op = false;
- static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
NEONFormatDecoder nfd(instr,
NEONFormatDecoder::ScalarFormatMap(),
NEONFormatDecoder::IntegerFormatMap());
- if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
- half_op = true;
- form = form_half;
- nfd.SetFormatMaps(&map_half);
- switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
- case NEON_FMAXV_H:
- mnemonic = "fmaxv";
- break;
- case NEON_FMINV_H:
- mnemonic = "fminv";
- break;
- case NEON_FMAXNMV_H:
- mnemonic = "fmaxnmv";
- break;
- case NEON_FMINNMV_H:
- mnemonic = "fminnmv";
- break;
- }
- } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
- nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
- nfd.SetFormatMap(1, nfd.FPFormatMap());
- switch (instr->Mask(NEONAcrossLanesFPMask)) {
- case NEON_FMAXV:
- mnemonic = "fmaxv";
- break;
- case NEON_FMINV:
- mnemonic = "fminv";
- break;
- case NEON_FMAXNMV:
- mnemonic = "fmaxnmv";
- break;
- case NEON_FMINNMV:
- mnemonic = "fminnmv";
- break;
- default:
- form = "(NEONAcrossLanes)";
- break;
- }
- } else if (instr->Mask(NEONAcrossLanesFMask) == NEONAcrossLanesFixed) {
- switch (instr->Mask(NEONAcrossLanesMask)) {
- case NEON_ADDV:
- mnemonic = "addv";
- break;
- case NEON_SMAXV:
- mnemonic = "smaxv";
- break;
- case NEON_SMINV:
- mnemonic = "sminv";
- break;
- case NEON_UMAXV:
- mnemonic = "umaxv";
- break;
- case NEON_UMINV:
- mnemonic = "uminv";
- break;
- case NEON_SADDLV:
- mnemonic = "saddlv";
- nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
- break;
- case NEON_UADDLV:
- mnemonic = "uaddlv";
- nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
- break;
- default:
- form = "(NEONAcrossLanes)";
- break;
- }
+ switch (form_hash_) {
+ case "saddlv_asimdall_only"_h:
+ case "uaddlv_asimdall_only"_h:
+ nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
}
- if (half_op) {
- Format(instr, mnemonic, nfd.Substitute(form));
- } else {
- Format(instr,
- mnemonic,
- nfd.Substitute(form,
- NEONFormatDecoder::kPlaceholder,
- NEONFormatDecoder::kFormat));
+ VectorFormat vform_src = nfd.GetVectorFormat(1);
+ if ((vform_src == kFormat2S) || (vform_src == kFormat2D)) {
+ mnemonic = NULL;
}
-}
+ Format(instr,
+ mnemonic,
+ nfd.Substitute(form,
+ NEONFormatDecoder::kPlaceholder,
+ NEONFormatDecoder::kFormat));
+}
void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- bool l_instr = false;
- bool fp_instr = false;
- bool cn_instr = false;
- bool half_instr = false;
- bool fhm_instr = false; // FMLAL{2}, FMLSL{2}
-
const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
-
- static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}};
- static const NEONFormatMap map_cn =
+ static const NEONFormatMap map_v =
{{23, 22, 30},
- {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}};
- static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
- static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
+ {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}};
+ static const NEONFormatMap map_s = {{23, 22},
+ {NF_UNDEF, NF_H, NF_S, NF_UNDEF}};
+ NEONFormatDecoder nfd(instr, &map_v, &map_v, &map_s);
+ Format(instr, mnemonic_.c_str(), nfd.Substitute(form));
+}
+void Disassembler::DisassembleNEONMulByElementLong(const Instruction *instr) {
+ const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+ // TODO: Disallow undefined element types for this instruction.
+ static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}};
NEONFormatDecoder nfd(instr,
&map_ta,
NEONFormatDecoder::IntegerFormatMap(),
NEONFormatDecoder::ScalarFormatMap());
+ Format(instr, nfd.Mnemonic(mnemonic_.c_str()), nfd.Substitute(form));
+}
- switch (instr->Mask(NEONByIndexedElementMask)) {
- case NEON_SMULL_byelement:
- mnemonic = "smull";
- l_instr = true;
- break;
- case NEON_UMULL_byelement:
- mnemonic = "umull";
- l_instr = true;
- break;
- case NEON_SMLAL_byelement:
- mnemonic = "smlal";
- l_instr = true;
- break;
- case NEON_UMLAL_byelement:
- mnemonic = "umlal";
- l_instr = true;
- break;
- case NEON_SMLSL_byelement:
- mnemonic = "smlsl";
- l_instr = true;
- break;
- case NEON_UMLSL_byelement:
- mnemonic = "umlsl";
- l_instr = true;
- break;
- case NEON_SQDMULL_byelement:
- mnemonic = "sqdmull";
- l_instr = true;
- break;
- case NEON_SQDMLAL_byelement:
- mnemonic = "sqdmlal";
- l_instr = true;
- break;
- case NEON_SQDMLSL_byelement:
- mnemonic = "sqdmlsl";
- l_instr = true;
- break;
- case NEON_MUL_byelement:
- mnemonic = "mul";
- break;
- case NEON_MLA_byelement:
- mnemonic = "mla";
- break;
- case NEON_MLS_byelement:
- mnemonic = "mls";
- break;
- case NEON_SQDMULH_byelement:
- mnemonic = "sqdmulh";
- break;
- case NEON_SQRDMULH_byelement:
- mnemonic = "sqrdmulh";
- break;
- case NEON_SDOT_byelement:
- mnemonic = "sdot";
- form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]";
- nfd.SetFormatMap(1, &map_usdot);
- break;
- case NEON_SQRDMLAH_byelement:
- mnemonic = "sqrdmlah";
- break;
- case NEON_UDOT_byelement:
- mnemonic = "udot";
- form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]";
- nfd.SetFormatMap(1, &map_usdot);
- break;
- case NEON_SQRDMLSH_byelement:
- mnemonic = "sqrdmlsh";
- break;
- default: {
- switch (instr->Mask(NEONByIndexedElementFPLongMask)) {
- case NEON_FMLAL_H_byelement:
- mnemonic = "fmlal";
- fhm_instr = true;
- break;
- case NEON_FMLAL2_H_byelement:
- mnemonic = "fmlal2";
- fhm_instr = true;
- break;
- case NEON_FMLSL_H_byelement:
- mnemonic = "fmlsl";
- fhm_instr = true;
- break;
- case NEON_FMLSL2_H_byelement:
- mnemonic = "fmlsl2";
- fhm_instr = true;
- break;
- default:
- switch (instr->Mask(NEONByIndexedElementFPMask)) {
- case NEON_FMUL_byelement:
- mnemonic = "fmul";
- fp_instr = true;
- break;
- case NEON_FMLA_byelement:
- mnemonic = "fmla";
- fp_instr = true;
- break;
- case NEON_FMLS_byelement:
- mnemonic = "fmls";
- fp_instr = true;
- break;
- case NEON_FMULX_byelement:
- mnemonic = "fmulx";
- fp_instr = true;
- break;
- case NEON_FMLA_H_byelement:
- mnemonic = "fmla";
- half_instr = true;
- break;
- case NEON_FMLS_H_byelement:
- mnemonic = "fmls";
- half_instr = true;
- break;
- case NEON_FMUL_H_byelement:
- mnemonic = "fmul";
- half_instr = true;
- break;
- case NEON_FMULX_H_byelement:
- mnemonic = "fmulx";
- half_instr = true;
- break;
- default:
- switch (instr->Mask(NEONByIndexedElementFPComplexMask)) {
- case NEON_FCMLA_byelement:
- mnemonic = "fcmla";
- cn_instr = true;
- form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndexRot], 'ILFCNR";
- break;
- }
- }
- }
- }
- }
+void Disassembler::DisassembleNEONDotProdByElement(const Instruction *instr) {
+ const char *form = instr->ExtractBit(30) ? "'Vd.4s, 'Vn.16" : "'Vd.2s, 'Vn.8";
+ const char *suffix = "b, 'Vm.4b['u1111:2121]";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
- if (fhm_instr) {
- // These are oddballs. Set the format manually.
- form = (instr->Mask(NEON_Q) == 0)
- ? "'Vd.2s, 'Vn.2h, 'Ve.h['IVByElemIndexFHM]"
- : "'Vd.4s, 'Vn.4h, 'Ve.h['IVByElemIndexFHM]";
- Format(instr, mnemonic, nfd.Substitute(form));
- } else if (half_instr) {
- form = "'Vd.%s, 'Vn.%s, 'Ve.h['IVByElemIndex]";
- nfd.SetFormatMaps(&map_half, &map_half);
- Format(instr, mnemonic, nfd.Substitute(form));
- } else if (l_instr) {
- Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
- } else if (fp_instr) {
- nfd.SetFormatMap(0, nfd.FPFormatMap());
- Format(instr, mnemonic, nfd.Substitute(form));
- } else if (cn_instr) {
- nfd.SetFormatMap(0, &map_cn);
- nfd.SetFormatMap(1, &map_cn);
- Format(instr, mnemonic, nfd.Substitute(form));
- } else {
- nfd.SetFormatMap(0, nfd.IntegerFormatMap());
- Format(instr, mnemonic, nfd.Substitute(form));
- }
+void Disassembler::DisassembleNEONFPMulByElement(const Instruction *instr) {
+ const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::FPFormatMap(),
+ NEONFormatDecoder::FPFormatMap(),
+ NEONFormatDecoder::FPScalarFormatMap());
+ Format(instr, mnemonic_.c_str(), nfd.Substitute(form));
}
+void Disassembler::DisassembleNEONHalfFPMulByElement(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr,
+ "'Vd.'?30:84h, 'Vn.'?30:84h, "
+ "'Ve.h['IVByElemIndex]");
+}
+
+void Disassembler::DisassembleNEONFPMulByElementLong(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr,
+ "'Vd.'?30:42s, 'Vn.'?30:42h, "
+ "'Ve.h['IVByElemIndexFHM]");
+}
+
+void Disassembler::DisassembleNEONComplexMulByElement(
+ const Instruction *instr) {
+ const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndexRot], #'u1413*90";
+ // TODO: Disallow undefined element types for this instruction.
+ static const NEONFormatMap map_cn =
+ {{23, 22, 30},
+ {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}};
+ NEONFormatDecoder nfd(instr,
+ &map_cn,
+ &map_cn,
+ NEONFormatDecoder::ScalarFormatMap());
+ Format(instr, mnemonic_.c_str(), nfd.Substitute(form));
+}
void Disassembler::VisitNEONCopy(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "(NEONCopy)";
NEONFormatDecoder nfd(instr,
NEONFormatDecoder::TriangularFormatMap(),
NEONFormatDecoder::TriangularScalarFormatMap());
- if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
- mnemonic = "mov";
- nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
- form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]";
- } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
- mnemonic = "mov";
- nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
- if (nfd.GetVectorFormat() == kFormatD) {
- form = "'Vd.%s['IVInsIndex1], 'Xn";
- } else {
- form = "'Vd.%s['IVInsIndex1], 'Wn";
- }
- } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
- if (instr->Mask(NEON_Q) || ((instr->GetImmNEON5() & 7) == 4)) {
+ switch (form_hash_) {
+ case "ins_asimdins_iv_v"_h:
mnemonic = "mov";
- } else {
- mnemonic = "umov";
- }
- nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
- if (nfd.GetVectorFormat() == kFormatD) {
- form = "'Xd, 'Vn.%s['IVInsIndex1]";
- } else {
- form = "'Wd, 'Vn.%s['IVInsIndex1]";
- }
- } else if (instr->Mask(NEONCopySmovMask) == NEON_SMOV) {
- mnemonic = "smov";
- nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
- form = "'R30d, 'Vn.%s['IVInsIndex1]";
- } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
- mnemonic = "dup";
- form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
- } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
- mnemonic = "dup";
- if (nfd.GetVectorFormat() == kFormat2D) {
- form = "'Vd.%s, 'Xn";
- } else {
- form = "'Vd.%s, 'Wn";
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]";
+ break;
+ case "ins_asimdins_ir_r"_h:
+ mnemonic = "mov";
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ if (nfd.GetVectorFormat() == kFormatD) {
+ form = "'Vd.%s['IVInsIndex1], 'Xn";
+ } else {
+ form = "'Vd.%s['IVInsIndex1], 'Wn";
+ }
+ break;
+ case "umov_asimdins_w_w"_h:
+ case "umov_asimdins_x_x"_h:
+ if (instr->Mask(NEON_Q) || ((instr->GetImmNEON5() & 7) == 4)) {
+ mnemonic = "mov";
+ }
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ if (nfd.GetVectorFormat() == kFormatD) {
+ form = "'Xd, 'Vn.%s['IVInsIndex1]";
+ } else {
+ form = "'Wd, 'Vn.%s['IVInsIndex1]";
+ }
+ break;
+ case "smov_asimdins_w_w"_h:
+ case "smov_asimdins_x_x"_h: {
+ nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap());
+ VectorFormat vform = nfd.GetVectorFormat();
+ if ((vform == kFormatD) ||
+ ((vform == kFormatS) && (instr->ExtractBit(30) == 0))) {
+ mnemonic = NULL;
+ }
+ form = "'R30d, 'Vn.%s['IVInsIndex1]";
+ break;
}
+ case "dup_asimdins_dv_v"_h:
+ form = "'Vd.%s, 'Vn.%s['IVInsIndex1]";
+ break;
+ case "dup_asimdins_dr_r"_h:
+ if (nfd.GetVectorFormat() == kFormat2D) {
+ form = "'Vd.%s, 'Xn";
+ } else {
+ form = "'Vd.%s, 'Wn";
+ }
}
Format(instr, mnemonic, nfd.Substitute(form));
}
void Disassembler::VisitNEONExtract(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(NEONExtract)";
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract";
NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
- if (instr->Mask(NEONExtractMask) == NEON_EXT) {
- mnemonic = "ext";
- form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract";
+ if ((instr->GetImmNEONExt() > 7) && (instr->GetNEONQ() == 0)) {
+ mnemonic = NULL;
}
Format(instr, mnemonic, nfd.Substitute(form));
}
@@ -3961,532 +3033,217 @@ void Disassembler::VisitNEONLoadStoreSingleStructPostIndex(
void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1";
- int half_enc = instr->ExtractBit(11);
- int cmode = instr->GetNEONCmode();
- int cmode_3 = (cmode >> 3) & 1;
- int cmode_2 = (cmode >> 2) & 1;
- int cmode_1 = (cmode >> 1) & 1;
- int cmode_0 = cmode & 1;
- int q = instr->GetNEONQ();
- int op = instr->GetNEONModImmOp();
-
- static const NEONFormatMap map_b = {{30}, {NF_8B, NF_16B}};
static const NEONFormatMap map_h = {{30}, {NF_4H, NF_8H}};
static const NEONFormatMap map_s = {{30}, {NF_2S, NF_4S}};
- NEONFormatDecoder nfd(instr, &map_b);
- if (cmode_3 == 0) {
- if (cmode_0 == 0) {
- mnemonic = (op == 1) ? "mvni" : "movi";
- } else { // cmode<0> == '1'.
- mnemonic = (op == 1) ? "bic" : "orr";
- }
- nfd.SetFormatMap(0, &map_s);
- } else { // cmode<3> == '1'.
- if (cmode_2 == 0) {
- if (cmode_0 == 0) {
- mnemonic = (op == 1) ? "mvni" : "movi";
- } else { // cmode<0> == '1'.
- mnemonic = (op == 1) ? "bic" : "orr";
- }
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+
+ switch (form_hash_) {
+ case "movi_asimdimm_n_b"_h:
+ form = "'Vt.%s, 'IVMIImm8";
+ break;
+ case "bic_asimdimm_l_hl"_h:
+ case "movi_asimdimm_l_hl"_h:
+ case "mvni_asimdimm_l_hl"_h:
+ case "orr_asimdimm_l_hl"_h:
nfd.SetFormatMap(0, &map_h);
- } else { // cmode<2> == '1'.
- if (cmode_1 == 0) {
- mnemonic = (op == 1) ? "mvni" : "movi";
- form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2";
- nfd.SetFormatMap(0, &map_s);
- } else { // cmode<1> == '1'.
- if (cmode_0 == 0) {
- mnemonic = "movi";
- if (op == 0) {
- form = "'Vt.%s, 'IVMIImm8";
- } else {
- form = (q == 0) ? "'Dd, 'IVMIImm" : "'Vt.2d, 'IVMIImm";
- }
- } else { // cmode<0> == '1'
- mnemonic = "fmov";
- form = "'Vt.%s, 'IFPNeon";
- if (half_enc == 1) {
- nfd.SetFormatMap(0, &map_h);
- } else if (op == 0) {
- nfd.SetFormatMap(0, &map_s);
- } else if (q == 1) {
- form = "'Vt.2d, 'IFPNeon";
- } else {
- mnemonic = "unallocated";
- form = "(NEONModifiedImmediate)";
- }
- }
- }
- }
+ break;
+ case "movi_asimdimm_m_sm"_h:
+ case "mvni_asimdimm_m_sm"_h:
+ form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2";
+ VIXL_FALLTHROUGH();
+ case "bic_asimdimm_l_sl"_h:
+ case "movi_asimdimm_l_sl"_h:
+ case "mvni_asimdimm_l_sl"_h:
+ case "orr_asimdimm_l_sl"_h:
+ nfd.SetFormatMap(0, &map_s);
+ break;
+ case "movi_asimdimm_d_ds"_h:
+ form = "'Dd, 'IVMIImm";
+ break;
+ case "movi_asimdimm_d2_d"_h:
+ form = "'Vt.2d, 'IVMIImm";
+ break;
+ case "fmov_asimdimm_h_h"_h:
+ form = "'Vt.%s, 'IFPNeon";
+ nfd.SetFormatMap(0, &map_h);
+ break;
+ case "fmov_asimdimm_s_s"_h:
+ form = "'Vt.%s, 'IFPNeon";
+ nfd.SetFormatMap(0, &map_s);
+ break;
+ case "fmov_asimdimm_d2_d"_h:
+ form = "'Vt.2d, 'IFPNeon";
+ break;
}
+
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::DisassembleNEONScalar2RegMiscOnlyD(
+ const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Dd, 'Dn";
+ const char *suffix = ", #0";
+ if (instr->GetNEONSize() != 3) {
+ mnemonic = NULL;
+ }
+ switch (form_hash_) {
+ case "abs_asisdmisc_r"_h:
+ case "neg_asisdmisc_r"_h:
+ suffix = NULL;
+ }
+ Format(instr, mnemonic, form, suffix);
+}
-void Disassembler::VisitNEONScalar2RegMisc(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+void Disassembler::DisassembleNEONFPScalar2RegMisc(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "%sd, %sn";
- const char *form_0 = "%sd, %sn, #0";
- const char *form_fp0 = "%sd, %sn, #0.0";
+ const char *suffix = NULL;
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
+ switch (form_hash_) {
+ case "fcmeq_asisdmisc_fz"_h:
+ case "fcmge_asisdmisc_fz"_h:
+ case "fcmgt_asisdmisc_fz"_h:
+ case "fcmle_asisdmisc_fz"_h:
+ case "fcmlt_asisdmisc_fz"_h:
+ suffix = ", #0.0";
+ break;
+ case "fcvtxn_asisdmisc_n"_h:
+ if (nfd.GetVectorFormat(0) == kFormatS) { // Source format.
+ mnemonic = NULL;
+ }
+ form = "'Sd, 'Dn";
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix);
+}
+void Disassembler::VisitNEONScalar2RegMisc(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn";
NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
-
- if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
- // These instructions all use a two bit size field, except NOT and RBIT,
- // which use the field to encode the operation.
- switch (instr->Mask(NEONScalar2RegMiscMask)) {
- case NEON_CMGT_zero_scalar:
- mnemonic = "cmgt";
- form = form_0;
- break;
- case NEON_CMGE_zero_scalar:
- mnemonic = "cmge";
- form = form_0;
- break;
- case NEON_CMLE_zero_scalar:
- mnemonic = "cmle";
- form = form_0;
- break;
- case NEON_CMLT_zero_scalar:
- mnemonic = "cmlt";
- form = form_0;
- break;
- case NEON_CMEQ_zero_scalar:
- mnemonic = "cmeq";
- form = form_0;
- break;
- case NEON_NEG_scalar:
- mnemonic = "neg";
- break;
- case NEON_SQNEG_scalar:
- mnemonic = "sqneg";
- break;
- case NEON_ABS_scalar:
- mnemonic = "abs";
- break;
- case NEON_SQABS_scalar:
- mnemonic = "sqabs";
- break;
- case NEON_SUQADD_scalar:
- mnemonic = "suqadd";
- break;
- case NEON_USQADD_scalar:
- mnemonic = "usqadd";
- break;
- default:
- form = "(NEONScalar2RegMisc)";
- }
- } else {
- // These instructions all use a one bit size field, except SQXTUN, SQXTN
- // and UQXTN, which use a two bit size field.
- nfd.SetFormatMaps(nfd.FPScalarFormatMap());
- switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
- case NEON_FRSQRTE_scalar:
- mnemonic = "frsqrte";
- break;
- case NEON_FRECPE_scalar:
- mnemonic = "frecpe";
- break;
- case NEON_SCVTF_scalar:
- mnemonic = "scvtf";
- break;
- case NEON_UCVTF_scalar:
- mnemonic = "ucvtf";
- break;
- case NEON_FCMGT_zero_scalar:
- mnemonic = "fcmgt";
- form = form_fp0;
- break;
- case NEON_FCMGE_zero_scalar:
- mnemonic = "fcmge";
- form = form_fp0;
- break;
- case NEON_FCMLE_zero_scalar:
- mnemonic = "fcmle";
- form = form_fp0;
- break;
- case NEON_FCMLT_zero_scalar:
- mnemonic = "fcmlt";
- form = form_fp0;
- break;
- case NEON_FCMEQ_zero_scalar:
- mnemonic = "fcmeq";
- form = form_fp0;
- break;
- case NEON_FRECPX_scalar:
- mnemonic = "frecpx";
- break;
- case NEON_FCVTNS_scalar:
- mnemonic = "fcvtns";
- break;
- case NEON_FCVTNU_scalar:
- mnemonic = "fcvtnu";
- break;
- case NEON_FCVTPS_scalar:
- mnemonic = "fcvtps";
- break;
- case NEON_FCVTPU_scalar:
- mnemonic = "fcvtpu";
- break;
- case NEON_FCVTMS_scalar:
- mnemonic = "fcvtms";
- break;
- case NEON_FCVTMU_scalar:
- mnemonic = "fcvtmu";
- break;
- case NEON_FCVTZS_scalar:
- mnemonic = "fcvtzs";
- break;
- case NEON_FCVTZU_scalar:
- mnemonic = "fcvtzu";
- break;
- case NEON_FCVTAS_scalar:
- mnemonic = "fcvtas";
- break;
- case NEON_FCVTAU_scalar:
- mnemonic = "fcvtau";
- break;
- case NEON_FCVTXN_scalar:
- nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
- mnemonic = "fcvtxn";
- break;
- default:
- nfd.SetFormatMap(0, nfd.ScalarFormatMap());
- nfd.SetFormatMap(1, nfd.LongScalarFormatMap());
- switch (instr->Mask(NEONScalar2RegMiscMask)) {
- case NEON_SQXTN_scalar:
- mnemonic = "sqxtn";
- break;
- case NEON_UQXTN_scalar:
- mnemonic = "uqxtn";
- break;
- case NEON_SQXTUN_scalar:
- mnemonic = "sqxtun";
- break;
- default:
- form = "(NEONScalar2RegMisc)";
- }
- }
+ switch (form_hash_) {
+ case "sqxtn_asisdmisc_n"_h:
+ case "sqxtun_asisdmisc_n"_h:
+ case "uqxtn_asisdmisc_n"_h:
+ nfd.SetFormatMap(1, nfd.LongScalarFormatMap());
}
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
void Disassembler::VisitNEONScalar2RegMiscFP16(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Hd, 'Hn";
- const char *form_fp0 = "'Hd, 'Hn, #0.0";
-
- switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
-#define FORMAT(A, B) \
- case NEON_##A##_H_scalar: \
- mnemonic = B; \
- break;
- // clang-format off
- FORMAT(FCVTNS, "fcvtns")
- FORMAT(FCVTMS, "fcvtms")
- FORMAT(FCVTAS, "fcvtas")
- FORMAT(SCVTF, "scvtf")
- FORMAT(FCVTPS, "fcvtps")
- FORMAT(FCVTZS, "fcvtzs")
- FORMAT(FRECPE, "frecpe")
- FORMAT(FRECPX, "frecpx")
- FORMAT(FCVTNU, "fcvtnu")
- FORMAT(FCVTMU, "fcvtmu")
- FORMAT(FCVTAU, "fcvtau")
- FORMAT(UCVTF, "ucvtf")
- FORMAT(FCVTPU, "fcvtpu")
- FORMAT(FCVTZU, "fcvtzu")
- FORMAT(FRSQRTE, "frsqrte")
-// clang-format on
-#undef FORMAT
-#define FORMAT(A, B) \
- case NEON_##A##_H_zero_scalar: \
- mnemonic = B; \
- form = form_fp0; \
- break;
- FORMAT(FCMGT, "fcmgt")
- FORMAT(FCMEQ, "fcmeq")
- FORMAT(FCMLT, "fcmlt")
- FORMAT(FCMGE, "fcmge")
- FORMAT(FCMLE, "fcmle")
-#undef FORMAT
+ const char *suffix = NULL;
- default:
- VIXL_UNREACHABLE();
+ switch (form_hash_) {
+ case "fcmeq_asisdmiscfp16_fz"_h:
+ case "fcmge_asisdmiscfp16_fz"_h:
+ case "fcmgt_asisdmiscfp16_fz"_h:
+ case "fcmle_asisdmiscfp16_fz"_h:
+ case "fcmlt_asisdmiscfp16_fz"_h:
+ suffix = ", #0.0";
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitNEONScalar3Diff(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "%sd, %sn, %sm";
NEONFormatDecoder nfd(instr,
NEONFormatDecoder::LongScalarFormatMap(),
NEONFormatDecoder::ScalarFormatMap());
-
- switch (instr->Mask(NEONScalar3DiffMask)) {
- case NEON_SQDMLAL_scalar:
- mnemonic = "sqdmlal";
- break;
- case NEON_SQDMLSL_scalar:
- mnemonic = "sqdmlsl";
- break;
- case NEON_SQDMULL_scalar:
- mnemonic = "sqdmull";
- break;
- default:
- form = "(NEONScalar3Diff)";
+ if (nfd.GetVectorFormat(0) == kFormatH) {
+ mnemonic = NULL;
}
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
+void Disassembler::DisassembleNEONFPScalar3Same(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn, %sm";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+}
+
+void Disassembler::DisassembleNEONScalar3SameOnlyD(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Dd, 'Dn, 'Dm";
+ if (instr->GetNEONSize() != 3) {
+ mnemonic = NULL;
+ }
+ Format(instr, mnemonic, form);
+}
void Disassembler::VisitNEONScalar3Same(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "%sd, %sn, %sm";
NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
-
- if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
- nfd.SetFormatMaps(nfd.FPScalarFormatMap());
- switch (instr->Mask(NEONScalar3SameFPMask)) {
- case NEON_FACGE_scalar:
- mnemonic = "facge";
- break;
- case NEON_FACGT_scalar:
- mnemonic = "facgt";
- break;
- case NEON_FCMEQ_scalar:
- mnemonic = "fcmeq";
- break;
- case NEON_FCMGE_scalar:
- mnemonic = "fcmge";
- break;
- case NEON_FCMGT_scalar:
- mnemonic = "fcmgt";
- break;
- case NEON_FMULX_scalar:
- mnemonic = "fmulx";
- break;
- case NEON_FRECPS_scalar:
- mnemonic = "frecps";
- break;
- case NEON_FRSQRTS_scalar:
- mnemonic = "frsqrts";
- break;
- case NEON_FABD_scalar:
- mnemonic = "fabd";
- break;
- default:
- form = "(NEONScalar3Same)";
- }
- } else {
- switch (instr->Mask(NEONScalar3SameMask)) {
- case NEON_ADD_scalar:
- mnemonic = "add";
- break;
- case NEON_SUB_scalar:
- mnemonic = "sub";
- break;
- case NEON_CMEQ_scalar:
- mnemonic = "cmeq";
- break;
- case NEON_CMGE_scalar:
- mnemonic = "cmge";
- break;
- case NEON_CMGT_scalar:
- mnemonic = "cmgt";
- break;
- case NEON_CMHI_scalar:
- mnemonic = "cmhi";
- break;
- case NEON_CMHS_scalar:
- mnemonic = "cmhs";
- break;
- case NEON_CMTST_scalar:
- mnemonic = "cmtst";
- break;
- case NEON_UQADD_scalar:
- mnemonic = "uqadd";
- break;
- case NEON_SQADD_scalar:
- mnemonic = "sqadd";
- break;
- case NEON_UQSUB_scalar:
- mnemonic = "uqsub";
- break;
- case NEON_SQSUB_scalar:
- mnemonic = "sqsub";
- break;
- case NEON_USHL_scalar:
- mnemonic = "ushl";
- break;
- case NEON_SSHL_scalar:
- mnemonic = "sshl";
- break;
- case NEON_UQSHL_scalar:
- mnemonic = "uqshl";
- break;
- case NEON_SQSHL_scalar:
- mnemonic = "sqshl";
- break;
- case NEON_URSHL_scalar:
- mnemonic = "urshl";
- break;
- case NEON_SRSHL_scalar:
- mnemonic = "srshl";
- break;
- case NEON_UQRSHL_scalar:
- mnemonic = "uqrshl";
- break;
- case NEON_SQRSHL_scalar:
- mnemonic = "sqrshl";
- break;
- case NEON_SQDMULH_scalar:
- mnemonic = "sqdmulh";
- break;
- case NEON_SQRDMULH_scalar:
- mnemonic = "sqrdmulh";
- break;
- default:
- form = "(NEONScalar3Same)";
- }
+ VectorFormat vform = nfd.GetVectorFormat(0);
+ switch (form_hash_) {
+ case "srshl_asisdsame_only"_h:
+ case "urshl_asisdsame_only"_h:
+ case "sshl_asisdsame_only"_h:
+ case "ushl_asisdsame_only"_h:
+ if (vform != kFormatD) {
+ mnemonic = NULL;
+ }
+ break;
+ case "sqdmulh_asisdsame_only"_h:
+ case "sqrdmulh_asisdsame_only"_h:
+ if ((vform == kFormatB) || (vform == kFormatD)) {
+ mnemonic = NULL;
+ }
}
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
void Disassembler::VisitNEONScalar3SameFP16(const Instruction *instr) {
- const char *mnemonic = NULL;
- const char *form = "'Hd, 'Hn, 'Hm";
-
- switch (instr->Mask(NEONScalar3SameFP16Mask)) {
- case NEON_FABD_H_scalar:
- mnemonic = "fabd";
- break;
- case NEON_FMULX_H_scalar:
- mnemonic = "fmulx";
- break;
- case NEON_FCMEQ_H_scalar:
- mnemonic = "fcmeq";
- break;
- case NEON_FCMGE_H_scalar:
- mnemonic = "fcmge";
- break;
- case NEON_FCMGT_H_scalar:
- mnemonic = "fcmgt";
- break;
- case NEON_FACGE_H_scalar:
- mnemonic = "facge";
- break;
- case NEON_FACGT_H_scalar:
- mnemonic = "facgt";
- break;
- case NEON_FRECPS_H_scalar:
- mnemonic = "frecps";
- break;
- case NEON_FRSQRTS_H_scalar:
- mnemonic = "frsqrts";
- break;
- default:
- VIXL_UNREACHABLE();
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Hd, 'Hn, 'Hm");
}
void Disassembler::VisitNEONScalar3SameExtra(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "%sd, %sn, %sm";
- NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
+ USE(instr);
+ // Nothing to do - handled by VisitNEONScalar3Same.
+ VIXL_UNREACHABLE();
+}
- switch (instr->Mask(NEONScalar3SameExtraMask)) {
- case NEON_SQRDMLAH_scalar:
- mnemonic = "sqrdmlah";
- break;
- case NEON_SQRDMLSH_scalar:
- mnemonic = "sqrdmlsh";
- break;
- default:
- form = "(NEONScalar3SameExtra)";
+void Disassembler::DisassembleNEONScalarSatMulLongIndex(
+ const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::LongScalarFormatMap(),
+ NEONFormatDecoder::ScalarFormatMap());
+ if (nfd.GetVectorFormat(0) == kFormatH) {
+ mnemonic = NULL;
}
- Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
+ Format(instr,
+ mnemonic,
+ nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat));
}
+void Disassembler::DisassembleNEONFPScalarMulIndex(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+ static const NEONFormatMap map = {{23, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}};
+ NEONFormatDecoder nfd(instr, &map);
+ Format(instr,
+ mnemonic,
+ nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat));
+}
void Disassembler::VisitNEONScalarByIndexedElement(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
- const char *form_half = "'Hd, 'Hn, 'Ve.h['IVByElemIndex]";
NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
- bool long_instr = false;
-
- switch (instr->Mask(NEONScalarByIndexedElementMask)) {
- case NEON_SQDMULL_byelement_scalar:
- mnemonic = "sqdmull";
- long_instr = true;
- break;
- case NEON_SQDMLAL_byelement_scalar:
- mnemonic = "sqdmlal";
- long_instr = true;
- break;
- case NEON_SQDMLSL_byelement_scalar:
- mnemonic = "sqdmlsl";
- long_instr = true;
- break;
- case NEON_SQDMULH_byelement_scalar:
- mnemonic = "sqdmulh";
- break;
- case NEON_SQRDMULH_byelement_scalar:
- mnemonic = "sqrdmulh";
- break;
- case NEON_SQRDMLAH_byelement_scalar:
- mnemonic = "sqrdmlah";
- break;
- case NEON_SQRDMLSH_byelement_scalar:
- mnemonic = "sqrdmlsh";
- break;
- default:
- nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
- switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
- case NEON_FMUL_byelement_scalar:
- mnemonic = "fmul";
- break;
- case NEON_FMLA_byelement_scalar:
- mnemonic = "fmla";
- break;
- case NEON_FMLS_byelement_scalar:
- mnemonic = "fmls";
- break;
- case NEON_FMULX_byelement_scalar:
- mnemonic = "fmulx";
- break;
- case NEON_FMLA_H_byelement_scalar:
- mnemonic = "fmla";
- form = form_half;
- break;
- case NEON_FMLS_H_byelement_scalar:
- mnemonic = "fmls";
- form = form_half;
- break;
- case NEON_FMUL_H_byelement_scalar:
- mnemonic = "fmul";
- form = form_half;
- break;
- case NEON_FMULX_H_byelement_scalar:
- mnemonic = "fmulx";
- form = form_half;
- break;
- default:
- form = "(NEONScalarByIndexedElement)";
- }
+ VectorFormat vform_dst = nfd.GetVectorFormat(0);
+ if ((vform_dst == kFormatB) || (vform_dst == kFormatD)) {
+ mnemonic = NULL;
}
-
- if (long_instr) {
- nfd.SetFormatMap(0, nfd.LongScalarFormatMap());
- }
-
Format(instr,
mnemonic,
nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat));
@@ -4509,379 +3266,174 @@ void Disassembler::VisitNEONScalarCopy(const Instruction *instr) {
void Disassembler::VisitNEONScalarPairwise(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "%sd, 'Vn.%s";
- NEONFormatMap map = {{22}, {NF_2S, NF_2D}};
- NEONFormatDecoder nfd(instr,
- NEONFormatDecoder::FPScalarPairwiseFormatMap(),
- &map);
-
- switch (instr->Mask(NEONScalarPairwiseMask)) {
- case NEON_ADDP_scalar:
- // All pairwise operations except ADDP use bit U to differentiate FP16
- // from FP32/FP64 variations.
- nfd.SetFormatMap(0, NEONFormatDecoder::FPScalarFormatMap());
- mnemonic = "addp";
- break;
- case NEON_FADDP_h_scalar:
- form = "%sd, 'Vn.2h";
- VIXL_FALLTHROUGH();
- case NEON_FADDP_scalar:
- mnemonic = "faddp";
- break;
- case NEON_FMAXP_h_scalar:
- form = "%sd, 'Vn.2h";
- VIXL_FALLTHROUGH();
- case NEON_FMAXP_scalar:
- mnemonic = "fmaxp";
- break;
- case NEON_FMAXNMP_h_scalar:
- form = "%sd, 'Vn.2h";
- VIXL_FALLTHROUGH();
- case NEON_FMAXNMP_scalar:
- mnemonic = "fmaxnmp";
- break;
- case NEON_FMINP_h_scalar:
- form = "%sd, 'Vn.2h";
- VIXL_FALLTHROUGH();
- case NEON_FMINP_scalar:
- mnemonic = "fminp";
- break;
- case NEON_FMINNMP_h_scalar:
- form = "%sd, 'Vn.2h";
- VIXL_FALLTHROUGH();
- case NEON_FMINNMP_scalar:
- mnemonic = "fminnmp";
- break;
- default:
- form = "(NEONScalarPairwise)";
+ const char *mnemonic = mnemonic_.c_str();
+ if (form_hash_ == "addp_asisdpair_only"_h) {
+ // All pairwise operations except ADDP use bit U to differentiate FP16
+ // from FP32/FP64 variations.
+ if (instr->GetNEONSize() != 3) {
+ mnemonic = NULL;
+ }
+ Format(instr, mnemonic, "'Dd, 'Vn.2d");
+ } else {
+ const char *form = "%sd, 'Vn.2%s";
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::FPScalarPairwiseFormatMap());
+
+ Format(instr,
+ mnemonic,
+ nfd.Substitute(form,
+ NEONFormatDecoder::kPlaceholder,
+ NEONFormatDecoder::kFormat));
}
- Format(instr,
- mnemonic,
- nfd.Substitute(form,
- NEONFormatDecoder::kPlaceholder,
- NEONFormatDecoder::kFormat));
}
+void Disassembler::DisassembleNEONScalarShiftImmOnlyD(
+ const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Dd, 'Dn, ";
+ const char *suffix = "'IsR";
-void Disassembler::VisitNEONScalarShiftImmediate(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "%sd, %sn, 'Is1";
- const char *form_2 = "%sd, %sn, 'Is2";
-
- static const NEONFormatMap map_shift = {{22, 21, 20, 19},
- {NF_UNDEF,
- NF_B,
- NF_H,
- NF_H,
- NF_S,
- NF_S,
- NF_S,
- NF_S,
- NF_D,
- NF_D,
- NF_D,
- NF_D,
- NF_D,
- NF_D,
- NF_D,
- NF_D}};
- static const NEONFormatMap map_shift_narrow =
- {{21, 20, 19}, {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}};
- NEONFormatDecoder nfd(instr, &map_shift);
-
- if (instr->GetImmNEONImmh()) { // immh has to be non-zero.
- switch (instr->Mask(NEONScalarShiftImmediateMask)) {
- case NEON_FCVTZU_imm_scalar:
- mnemonic = "fcvtzu";
- break;
- case NEON_FCVTZS_imm_scalar:
- mnemonic = "fcvtzs";
- break;
- case NEON_SCVTF_imm_scalar:
- mnemonic = "scvtf";
- break;
- case NEON_UCVTF_imm_scalar:
- mnemonic = "ucvtf";
- break;
- case NEON_SRI_scalar:
- mnemonic = "sri";
- break;
- case NEON_SSHR_scalar:
- mnemonic = "sshr";
- break;
- case NEON_USHR_scalar:
- mnemonic = "ushr";
- break;
- case NEON_SRSHR_scalar:
- mnemonic = "srshr";
- break;
- case NEON_URSHR_scalar:
- mnemonic = "urshr";
- break;
- case NEON_SSRA_scalar:
- mnemonic = "ssra";
- break;
- case NEON_USRA_scalar:
- mnemonic = "usra";
- break;
- case NEON_SRSRA_scalar:
- mnemonic = "srsra";
- break;
- case NEON_URSRA_scalar:
- mnemonic = "ursra";
- break;
- case NEON_SHL_scalar:
- mnemonic = "shl";
- form = form_2;
- break;
- case NEON_SLI_scalar:
- mnemonic = "sli";
- form = form_2;
- break;
- case NEON_SQSHLU_scalar:
- mnemonic = "sqshlu";
- form = form_2;
- break;
- case NEON_SQSHL_imm_scalar:
- mnemonic = "sqshl";
- form = form_2;
- break;
- case NEON_UQSHL_imm_scalar:
- mnemonic = "uqshl";
- form = form_2;
- break;
- case NEON_UQSHRN_scalar:
- mnemonic = "uqshrn";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- case NEON_UQRSHRN_scalar:
- mnemonic = "uqrshrn";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- case NEON_SQSHRN_scalar:
- mnemonic = "sqshrn";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- case NEON_SQRSHRN_scalar:
- mnemonic = "sqrshrn";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- case NEON_SQSHRUN_scalar:
- mnemonic = "sqshrun";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- case NEON_SQRSHRUN_scalar:
- mnemonic = "sqrshrun";
- nfd.SetFormatMap(1, &map_shift_narrow);
- break;
- default:
- form = "(NEONScalarShiftImmediate)";
- }
- } else {
- form = "(NEONScalarShiftImmediate)";
+ if (instr->ExtractBit(22) == 0) {
+ // Only D registers are supported.
+ mnemonic = NULL;
}
+
+ switch (form_hash_) {
+ case "shl_asisdshf_r"_h:
+ case "sli_asisdshf_r"_h:
+ suffix = "'IsL";
+ }
+
+ Format(instr, mnemonic, form, suffix);
+}
+
+void Disassembler::DisassembleNEONScalarShiftRightNarrowImm(
+ const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn, 'IsR";
+ static const NEONFormatMap map_dst =
+ {{22, 21, 20, 19}, {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S}};
+ static const NEONFormatMap map_src =
+ {{22, 21, 20, 19}, {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}};
+ NEONFormatDecoder nfd(instr, &map_dst, &map_src);
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
+void Disassembler::VisitNEONScalarShiftImmediate(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "%sd, %sn, ";
+ const char *suffix = "'IsR";
+
+ // clang-format off
+ static const NEONFormatMap map = {{22, 21, 20, 19},
+ {NF_UNDEF, NF_B, NF_H, NF_H,
+ NF_S, NF_S, NF_S, NF_S,
+ NF_D, NF_D, NF_D, NF_D,
+ NF_D, NF_D, NF_D, NF_D}};
+ // clang-format on
+ NEONFormatDecoder nfd(instr, &map);
+ switch (form_hash_) {
+ case "sqshlu_asisdshf_r"_h:
+ case "sqshl_asisdshf_r"_h:
+ case "uqshl_asisdshf_r"_h:
+ suffix = "'IsL";
+ break;
+ default:
+ if (nfd.GetVectorFormat(0) == kFormatB) {
+ mnemonic = NULL;
+ }
+ }
+ Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix);
+}
-void Disassembler::VisitNEONShiftImmediate(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Vd.%s, 'Vn.%s, 'Is1";
- const char *form_shift_2 = "'Vd.%s, 'Vn.%s, 'Is2";
- const char *form_xtl = "'Vd.%s, 'Vn.%s";
-
- // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
- static const NEONFormatMap map_shift_ta =
- {{22, 21, 20, 19},
- {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
-
- // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
- // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
- static const NEONFormatMap map_shift_tb =
- {{22, 21, 20, 19, 30},
- {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_4H,
- NF_8H, NF_2S, NF_4S, NF_2S, NF_4S, NF_2S, NF_4S,
- NF_2S, NF_4S, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF,
- NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
- NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}};
-
- NEONFormatDecoder nfd(instr, &map_shift_tb);
-
- if (instr->GetImmNEONImmh()) { // immh has to be non-zero.
- switch (instr->Mask(NEONShiftImmediateMask)) {
- case NEON_SQSHLU:
- mnemonic = "sqshlu";
- form = form_shift_2;
- break;
- case NEON_SQSHL_imm:
- mnemonic = "sqshl";
- form = form_shift_2;
- break;
- case NEON_UQSHL_imm:
- mnemonic = "uqshl";
- form = form_shift_2;
- break;
- case NEON_SHL:
- mnemonic = "shl";
- form = form_shift_2;
- break;
- case NEON_SLI:
- mnemonic = "sli";
- form = form_shift_2;
- break;
- case NEON_SCVTF_imm:
- mnemonic = "scvtf";
- break;
- case NEON_UCVTF_imm:
- mnemonic = "ucvtf";
- break;
- case NEON_FCVTZU_imm:
- mnemonic = "fcvtzu";
- break;
- case NEON_FCVTZS_imm:
- mnemonic = "fcvtzs";
- break;
- case NEON_SRI:
- mnemonic = "sri";
- break;
- case NEON_SSHR:
- mnemonic = "sshr";
- break;
- case NEON_USHR:
- mnemonic = "ushr";
- break;
- case NEON_SRSHR:
- mnemonic = "srshr";
- break;
- case NEON_URSHR:
- mnemonic = "urshr";
- break;
- case NEON_SSRA:
- mnemonic = "ssra";
- break;
- case NEON_USRA:
- mnemonic = "usra";
- break;
- case NEON_SRSRA:
- mnemonic = "srsra";
- break;
- case NEON_URSRA:
- mnemonic = "ursra";
- break;
- case NEON_SHRN:
- mnemonic = instr->Mask(NEON_Q) ? "shrn2" : "shrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_RSHRN:
- mnemonic = instr->Mask(NEON_Q) ? "rshrn2" : "rshrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_UQSHRN:
- mnemonic = instr->Mask(NEON_Q) ? "uqshrn2" : "uqshrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_UQRSHRN:
- mnemonic = instr->Mask(NEON_Q) ? "uqrshrn2" : "uqrshrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_SQSHRN:
- mnemonic = instr->Mask(NEON_Q) ? "sqshrn2" : "sqshrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_SQRSHRN:
- mnemonic = instr->Mask(NEON_Q) ? "sqrshrn2" : "sqrshrn";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_SQSHRUN:
- mnemonic = instr->Mask(NEON_Q) ? "sqshrun2" : "sqshrun";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_SQRSHRUN:
- mnemonic = instr->Mask(NEON_Q) ? "sqrshrun2" : "sqrshrun";
- nfd.SetFormatMap(1, &map_shift_ta);
- break;
- case NEON_SSHLL:
- nfd.SetFormatMap(0, &map_shift_ta);
- if (instr->GetImmNEONImmb() == 0 &&
- CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // sxtl variant.
- form = form_xtl;
- mnemonic = instr->Mask(NEON_Q) ? "sxtl2" : "sxtl";
- } else { // sshll variant.
- form = form_shift_2;
- mnemonic = instr->Mask(NEON_Q) ? "sshll2" : "sshll";
- }
- break;
- case NEON_USHLL:
- nfd.SetFormatMap(0, &map_shift_ta);
- if (instr->GetImmNEONImmb() == 0 &&
- CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // uxtl variant.
- form = form_xtl;
- mnemonic = instr->Mask(NEON_Q) ? "uxtl2" : "uxtl";
- } else { // ushll variant.
- form = form_shift_2;
- mnemonic = instr->Mask(NEON_Q) ? "ushll2" : "ushll";
+void Disassembler::DisassembleNEONShiftLeftLongImm(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s";
+ const char *suffix = ", 'IsL";
+
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::ShiftLongNarrowImmFormatMap(),
+ NEONFormatDecoder::ShiftImmFormatMap());
+
+ if (instr->GetImmNEONImmb() == 0 &&
+ CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // xtl variant.
+ VIXL_ASSERT((form_hash_ == "sshll_asimdshf_l"_h) ||
+ (form_hash_ == "ushll_asimdshf_l"_h));
+ mnemonic = (form_hash_ == "sshll_asimdshf_l"_h) ? "sxtl" : "uxtl";
+ suffix = NULL;
+ }
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix);
+}
+
+void Disassembler::DisassembleNEONShiftRightImm(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'IsR";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap());
+
+ VectorFormat vform_dst = nfd.GetVectorFormat(0);
+ if (vform_dst != kFormatUndefined) {
+ uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst);
+ switch (form_hash_) {
+ case "scvtf_asimdshf_c"_h:
+ case "ucvtf_asimdshf_c"_h:
+ case "fcvtzs_asimdshf_c"_h:
+ case "fcvtzu_asimdshf_c"_h:
+ if (ls_dst == kBRegSize) {
+ mnemonic = NULL;
}
break;
- default:
- form = "(NEONShiftImmediate)";
}
- } else {
- form = "(NEONShiftImmediate)";
}
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::DisassembleNEONShiftRightNarrowImm(
+ const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'IsR";
+
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::ShiftImmFormatMap(),
+ NEONFormatDecoder::ShiftLongNarrowImmFormatMap());
+ Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
+}
+
+void Disassembler::VisitNEONShiftImmediate(const Instruction *instr) {
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Vd.%s, 'Vn.%s, 'IsL";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap());
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
void Disassembler::VisitNEONTable(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(NEONTable)";
+ const char *mnemonic = mnemonic_.c_str();
const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s";
const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s";
const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
const char form_4v[] =
"'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s";
- static const NEONFormatMap map_b = {{30}, {NF_8B, NF_16B}};
- NEONFormatDecoder nfd(instr, &map_b);
+ const char *form = form_1v;
- switch (instr->Mask(NEONTableMask)) {
- case NEON_TBL_1v:
- mnemonic = "tbl";
- form = form_1v;
- break;
- case NEON_TBL_2v:
- mnemonic = "tbl";
- form = form_2v;
- break;
- case NEON_TBL_3v:
- mnemonic = "tbl";
- form = form_3v;
- break;
- case NEON_TBL_4v:
- mnemonic = "tbl";
- form = form_4v;
- break;
- case NEON_TBX_1v:
- mnemonic = "tbx";
- form = form_1v;
- break;
- case NEON_TBX_2v:
- mnemonic = "tbx";
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
+
+ switch (form_hash_) {
+ case "tbl_asimdtbl_l2_2"_h:
+ case "tbx_asimdtbl_l2_2"_h:
form = form_2v;
break;
- case NEON_TBX_3v:
- mnemonic = "tbx";
+ case "tbl_asimdtbl_l3_3"_h:
+ case "tbx_asimdtbl_l3_3"_h:
form = form_3v;
break;
- case NEON_TBX_4v:
- mnemonic = "tbx";
+ case "tbl_asimdtbl_l4_4"_h:
+ case "tbx_asimdtbl_l4_4"_h:
form = form_4v;
break;
- default:
- break;
}
+ VIXL_ASSERT(form != NULL);
- char re_form[sizeof(form_4v) + 6];
+ char re_form[sizeof(form_4v) + 6]; // 3 * two-digit substitutions => 6
int reg_num = instr->GetRn();
snprintf(re_form,
sizeof(re_form),
@@ -4895,179 +3447,52 @@ void Disassembler::VisitNEONTable(const Instruction *instr) {
void Disassembler::VisitNEONPerm(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
NEONFormatDecoder nfd(instr);
+ FormatWithDecodedMnemonic(instr, nfd.Substitute("'Vd.%s, 'Vn.%s, 'Vm.%s"));
+}
- switch (instr->Mask(NEONPermMask)) {
- case NEON_TRN1:
- mnemonic = "trn1";
- break;
- case NEON_TRN2:
- mnemonic = "trn2";
- break;
- case NEON_UZP1:
- mnemonic = "uzp1";
- break;
- case NEON_UZP2:
- mnemonic = "uzp2";
- break;
- case NEON_ZIP1:
- mnemonic = "zip1";
- break;
- case NEON_ZIP2:
- mnemonic = "zip2";
- break;
- default:
- form = "(NEONPerm)";
- }
- Format(instr, mnemonic, nfd.Substitute(form));
+void Disassembler::Disassemble_Vd4S_Vn16B_Vm16B(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Vd.4s, 'Vn.16b, 'Vm.16b");
}
void Disassembler::
VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]";
-
- switch (instr->Mask(
- SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
- case LD1H_z_p_bz_s_x32_scaled:
- mnemonic = "ld1h";
- break;
- case LD1SH_z_p_bz_s_x32_scaled:
- mnemonic = "ld1sh";
- break;
- case LDFF1H_z_p_bz_s_x32_scaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SH_z_p_bz_s_x32_scaled:
- mnemonic = "ldff1sh";
- break;
- default:
- form = "(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]");
}
void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]";
-
- switch (
- instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
- case LD1W_z_p_bz_s_x32_scaled:
- mnemonic = "ld1w";
- break;
- case LDFF1W_z_p_bz_s_x32_scaled:
- mnemonic = "ldff1w";
- break;
- default:
- form = "(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]");
}
void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
const Instruction *instr) {
- const char *form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]";
-
- const char *mnemonic = "unimplemented";
- switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
- case LD1B_z_p_bz_s_x32_unscaled:
- mnemonic = "ld1b";
- break;
- case LD1H_z_p_bz_s_x32_unscaled:
- mnemonic = "ld1h";
- break;
- case LD1SB_z_p_bz_s_x32_unscaled:
- mnemonic = "ld1sb";
- break;
- case LD1SH_z_p_bz_s_x32_unscaled:
- mnemonic = "ld1sh";
- break;
- case LD1W_z_p_bz_s_x32_unscaled:
- mnemonic = "ld1w";
- break;
- case LDFF1B_z_p_bz_s_x32_unscaled:
- mnemonic = "ldff1b";
- break;
- case LDFF1H_z_p_bz_s_x32_unscaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SB_z_p_bz_s_x32_unscaled:
- mnemonic = "ldff1sb";
- break;
- case LDFF1SH_z_p_bz_s_x32_unscaled:
- mnemonic = "ldff1sh";
- break;
- case LDFF1W_z_p_bz_s_x32_unscaled:
- mnemonic = "ldff1w";
- break;
- default:
- form = "(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]");
}
void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm(
const Instruction *instr) {
const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]";
- const char *form_imm_b = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]";
+ const char *form_imm = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]";
const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]";
const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]";
- const char *form_imm;
- const char *mnemonic = "unimplemented";
- switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
- case LD1B_z_p_ai_s:
- mnemonic = "ld1b";
- form_imm = form_imm_b;
- break;
- case LD1H_z_p_ai_s:
- mnemonic = "ld1h";
- form_imm = form_imm_h;
- break;
- case LD1SB_z_p_ai_s:
- mnemonic = "ld1sb";
- form_imm = form_imm_b;
- break;
- case LD1SH_z_p_ai_s:
- mnemonic = "ld1sh";
- form_imm = form_imm_h;
- break;
- case LD1W_z_p_ai_s:
- mnemonic = "ld1w";
- form_imm = form_imm_w;
- break;
- case LDFF1B_z_p_ai_s:
- mnemonic = "ldff1b";
- form_imm = form_imm_b;
- break;
- case LDFF1H_z_p_ai_s:
- mnemonic = "ldff1h";
+ const char *mnemonic = mnemonic_.c_str();
+ switch (form_hash_) {
+ case "ld1h_z_p_ai_s"_h:
+ case "ld1sh_z_p_ai_s"_h:
+ case "ldff1h_z_p_ai_s"_h:
+ case "ldff1sh_z_p_ai_s"_h:
form_imm = form_imm_h;
break;
- case LDFF1SB_z_p_ai_s:
- mnemonic = "ldff1sb";
- form_imm = form_imm_b;
- break;
- case LDFF1SH_z_p_ai_s:
- mnemonic = "ldff1sh";
- form_imm = form_imm_h;
- break;
- case LDFF1W_z_p_ai_s:
- mnemonic = "ldff1w";
+ case "ld1w_z_p_ai_s"_h:
+ case "ldff1w_z_p_ai_s"_h:
form_imm = form_imm_w;
break;
- default:
- form = "(SVE32BitGatherLoad_VectorPlusImm)";
- form_imm = form;
- break;
}
if (instr->ExtractBits(20, 16) != 0) form = form_imm;
@@ -5107,70 +3532,21 @@ void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = (instr->ExtractBits(20, 16) != 0)
? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]"
: "'prefSVEOp, 'Pgl, ['Zn.s]";
-
- switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
- case PRFB_i_p_ai_s:
- mnemonic = "prfb";
- break;
- case PRFD_i_p_ai_s:
- mnemonic = "prfd";
- break;
- case PRFH_i_p_ai_s:
- mnemonic = "prfh";
- break;
- case PRFW_i_p_ai_s:
- mnemonic = "prfw";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]";
-
- switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
- case ST1H_z_p_bz_s_x32_scaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_s_x32_scaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]");
}
void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]";
-
- switch (
- instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
- case ST1B_z_p_bz_s_x32_unscaled:
- mnemonic = "st1b";
- break;
- case ST1H_z_p_bz_s_x32_unscaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_s_x32_unscaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]");
}
void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm(
@@ -5203,200 +3579,27 @@ void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm(
void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw #'u2423]";
-
- switch (instr->Mask(
- SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
- case LD1D_z_p_bz_d_x32_scaled:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_bz_d_x32_scaled:
- mnemonic = "ld1h";
- break;
- case LD1SH_z_p_bz_d_x32_scaled:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_bz_d_x32_scaled:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_bz_d_x32_scaled:
- mnemonic = "ld1w";
- break;
- case LDFF1D_z_p_bz_d_x32_scaled:
- mnemonic = "ldff1d";
- break;
- case LDFF1H_z_p_bz_d_x32_scaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SH_z_p_bz_d_x32_scaled:
- mnemonic = "ldff1sh";
- break;
- case LDFF1SW_z_p_bz_d_x32_scaled:
- mnemonic = "ldff1sw";
- break;
- case LDFF1W_z_p_bz_d_x32_scaled:
- mnemonic = "ldff1w";
- break;
- default:
- form = "(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw "
+ "#'u2423]");
}
void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]";
-
- switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
- case LD1D_z_p_bz_d_64_scaled:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_bz_d_64_scaled:
- mnemonic = "ld1h";
- break;
- case LD1SH_z_p_bz_d_64_scaled:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_bz_d_64_scaled:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_bz_d_64_scaled:
- mnemonic = "ld1w";
- break;
- case LDFF1D_z_p_bz_d_64_scaled:
- mnemonic = "ldff1d";
- break;
- case LDFF1H_z_p_bz_d_64_scaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SH_z_p_bz_d_64_scaled:
- mnemonic = "ldff1sh";
- break;
- case LDFF1SW_z_p_bz_d_64_scaled:
- mnemonic = "ldff1sw";
- break;
- case LDFF1W_z_p_bz_d_64_scaled:
- mnemonic = "ldff1w";
- break;
- default:
- form = "(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]");
}
void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]";
-
- switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
- case LD1B_z_p_bz_d_64_unscaled:
- mnemonic = "ld1b";
- break;
- case LD1D_z_p_bz_d_64_unscaled:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_bz_d_64_unscaled:
- mnemonic = "ld1h";
- break;
- case LD1SB_z_p_bz_d_64_unscaled:
- mnemonic = "ld1sb";
- break;
- case LD1SH_z_p_bz_d_64_unscaled:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_bz_d_64_unscaled:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_bz_d_64_unscaled:
- mnemonic = "ld1w";
- break;
- case LDFF1B_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1b";
- break;
- case LDFF1D_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1d";
- break;
- case LDFF1H_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SB_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1sb";
- break;
- case LDFF1SH_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1sh";
- break;
- case LDFF1SW_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1sw";
- break;
- case LDFF1W_z_p_bz_d_64_unscaled:
- mnemonic = "ldff1w";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]");
}
void Disassembler::
VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]";
-
- switch (instr->Mask(
- SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
- case LD1B_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1b";
- break;
- case LD1D_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1h";
- break;
- case LD1SB_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1sb";
- break;
- case LD1SH_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_bz_d_x32_unscaled:
- mnemonic = "ld1w";
- break;
- case LDFF1B_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1b";
- break;
- case LDFF1D_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1d";
- break;
- case LDFF1H_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1h";
- break;
- case LDFF1SB_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1sb";
- break;
- case LDFF1SH_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1sh";
- break;
- case LDFF1SW_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1sw";
- break;
- case LDFF1W_z_p_bz_d_x32_unscaled:
- mnemonic = "ldff1w";
- break;
- default:
- form = "(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]");
}
void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm(
@@ -5418,409 +3621,164 @@ void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm(
}
}
- const char *mnemonic = "unimplemented";
- switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
- case LD1B_z_p_ai_d:
- mnemonic = "ld1b";
- break;
- case LD1D_z_p_ai_d:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_ai_d:
- mnemonic = "ld1h";
- break;
- case LD1SB_z_p_ai_d:
- mnemonic = "ld1sb";
- break;
- case LD1SH_z_p_ai_d:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_ai_d:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_ai_d:
- mnemonic = "ld1w";
- break;
- case LDFF1B_z_p_ai_d:
- mnemonic = "ldff1b";
- break;
- case LDFF1D_z_p_ai_d:
- mnemonic = "ldff1d";
- break;
- case LDFF1H_z_p_ai_d:
- mnemonic = "ldff1h";
- break;
- case LDFF1SB_z_p_ai_d:
- mnemonic = "ldff1sb";
- break;
- case LDFF1SH_z_p_ai_d:
- mnemonic = "ldff1sh";
- break;
- case LDFF1SW_z_p_ai_d:
- mnemonic = "ldff1sw";
- break;
- case LDFF1W_z_p_ai_d:
- mnemonic = "ldff1w";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)";
+ const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d";
+ const char *suffix = "]";
- switch (
- instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
- case PRFB_i_p_bz_d_64_scaled:
- mnemonic = "prfb";
- form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d]";
- break;
- case PRFD_i_p_bz_d_64_scaled:
- mnemonic = "prfd";
- form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #3]";
- break;
- case PRFH_i_p_bz_d_64_scaled:
- mnemonic = "prfh";
- form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #1]";
+ switch (form_hash_) {
+ case "prfh_i_p_bz_d_64_scaled"_h:
+ suffix = ", lsl #1]";
break;
- case PRFW_i_p_bz_d_64_scaled:
- mnemonic = "prfw";
- form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, lsl #2]";
+ case "prfs_i_p_bz_d_64_scaled"_h:
+ suffix = ", lsl #2]";
break;
- default:
+ case "prfd_i_p_bz_d_64_scaled"_h:
+ suffix = ", lsl #3]";
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::
VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw";
- const char *suffix = NULL;
+ const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw ";
+ const char *suffix = "]";
- switch (instr->Mask(
- SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
- case PRFB_i_p_bz_d_x32_scaled:
- mnemonic = "prfb";
- suffix = " ]";
- break;
- case PRFD_i_p_bz_d_x32_scaled:
- mnemonic = "prfd";
- suffix = " #3]";
+ switch (form_hash_) {
+ case "prfh_i_p_bz_d_x32_scaled"_h:
+ suffix = "#1]";
break;
- case PRFH_i_p_bz_d_x32_scaled:
- mnemonic = "prfh";
- suffix = " #1]";
+ case "prfs_i_p_bz_d_x32_scaled"_h:
+ suffix = "#2]";
break;
- case PRFW_i_p_bz_d_x32_scaled:
- mnemonic = "prfw";
- suffix = " #2]";
- break;
- default:
- form = "(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets)";
+ case "prfd_i_p_bz_d_x32_scaled"_h:
+ suffix = "#3]";
break;
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = (instr->ExtractBits(20, 16) != 0)
? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]"
: "'prefSVEOp, 'Pgl, ['Zn.d]";
- switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
- case PRFB_i_p_ai_d:
- mnemonic = "prfb";
- break;
- case PRFD_i_p_ai_d:
- mnemonic = "prfd";
- break;
- case PRFH_i_p_ai_d:
- mnemonic = "prfh";
- break;
- case PRFW_i_p_ai_d:
- mnemonic = "prfw";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]";
-
- switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
- case ST1D_z_p_bz_d_64_scaled:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_bz_d_64_scaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_d_64_scaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]");
}
void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]";
-
- switch (
- instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
- case ST1B_z_p_bz_d_64_unscaled:
- mnemonic = "st1b";
- break;
- case ST1D_z_p_bz_d_64_unscaled:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_bz_d_64_unscaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_d_64_unscaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffset)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]");
}
void Disassembler::
VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]";
-
- switch (instr->Mask(
- SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
- case ST1D_z_p_bz_d_x32_scaled:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_bz_d_x32_scaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_d_x32_scaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr,
+ "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]");
}
void Disassembler::
VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]";
-
- switch (instr->Mask(
- SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
- case ST1B_z_p_bz_d_x32_unscaled:
- mnemonic = "st1b";
- break;
- case ST1D_z_p_bz_d_x32_unscaled:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_bz_d_x32_unscaled:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bz_d_x32_unscaled:
- mnemonic = "st1w";
- break;
- default:
- form = "(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]");
}
void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "{'Zt.d}, 'Pgl, ['Zn.d";
- const char *suffix = NULL;
+ const char *suffix = "]";
- bool is_zero = instr->ExtractBits(20, 16) == 0;
-
- switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
- case ST1B_z_p_ai_d:
- mnemonic = "st1b";
- suffix = is_zero ? "]" : ", #'u2016]";
- break;
- case ST1D_z_p_ai_d:
- mnemonic = "st1d";
- suffix = is_zero ? "]" : ", #'u2016*8]";
- break;
- case ST1H_z_p_ai_d:
- mnemonic = "st1h";
- suffix = is_zero ? "]" : ", #'u2016*2]";
- break;
- case ST1W_z_p_ai_d:
- mnemonic = "st1w";
- suffix = is_zero ? "]" : ", #'u2016*4]";
- break;
- default:
- form = "(SVE64BitScatterStore_VectorPlusImm)";
- break;
+ if (instr->ExtractBits(20, 16) != 0) {
+ switch (form_hash_) {
+ case "st1b_z_p_ai_d"_h:
+ suffix = ", #'u2016]";
+ break;
+ case "st1h_z_p_ai_d"_h:
+ suffix = ", #'u2016*2]";
+ break;
+ case "st1w_z_p_ai_d"_h:
+ suffix = ", #'u2016*4]";
+ break;
+ case "st1d_z_p_ai_d"_h:
+ suffix = ", #'u2016*8]";
+ break;
+ }
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'tl, 'Zd.'tl, 'ITriSvel";
-
if (instr->GetSVEImmLogical() == 0) {
// The immediate encoded in the instruction is not in the expected format.
Format(instr, "unallocated", "(SVEBitwiseImm)");
- return;
- }
-
- switch (instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
- case AND_z_zi:
- mnemonic = "and";
- break;
- case EOR_z_zi:
- mnemonic = "eor";
- break;
- case ORR_z_zi:
- mnemonic = "orr";
- break;
- default:
- break;
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'tl, 'Zd.'tl, 'ITriSvel");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
- case AND_z_p_zz:
- mnemonic = "and";
- break;
- case BIC_z_p_zz:
- mnemonic = "bic";
- break;
- case EOR_z_p_zz:
- mnemonic = "eor";
- break;
- case ORR_z_p_zz:
- mnemonic = "orr";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEBitwiseShiftByImm_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, 'ITriSveq";
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, ";
+ const char *suffix = NULL;
unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8);
if (tsize == 0) {
+ mnemonic = "unimplemented";
form = "(SVEBitwiseShiftByImm_Predicated)";
} else {
- switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
- case ASRD_z_p_zi:
- mnemonic = "asrd";
- break;
- case ASR_z_p_zi:
- mnemonic = "asr";
- break;
- case LSL_z_p_zi:
- mnemonic = "lsl";
- form = "'Zd.'tszp, p'u1210/m, 'Zd.'tszp, 'ITriSvep";
- break;
- case LSR_z_p_zi:
- mnemonic = "lsr";
+ switch (form_hash_) {
+ case "lsl_z_p_zi"_h:
+ case "sqshl_z_p_zi"_h:
+ case "sqshlu_z_p_zi"_h:
+ case "uqshl_z_p_zi"_h:
+ suffix = "'ITriSvep";
+ break;
+ case "asrd_z_p_zi"_h:
+ case "asr_z_p_zi"_h:
+ case "lsr_z_p_zi"_h:
+ case "srshr_z_p_zi"_h:
+ case "urshr_z_p_zi"_h:
+ suffix = "'ITriSveq";
break;
default:
+ mnemonic = "unimplemented";
+ form = "(SVEBitwiseShiftByImm_Predicated)";
break;
}
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitSVEBitwiseShiftByVector_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
- case ASRR_z_p_zz:
- mnemonic = "asrr";
- break;
- case ASR_z_p_zz:
- mnemonic = "asr";
- break;
- case LSLR_z_p_zz:
- mnemonic = "lslr";
- break;
- case LSL_z_p_zz:
- mnemonic = "lsl";
- break;
- case LSRR_z_p_zz:
- mnemonic = "lsrr";
- break;
- case LSR_z_p_zz:
- mnemonic = "lsr";
- break;
- default:
- form = "(SVEBitwiseShiftByVector_Predicated)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d";
-
if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
- form = "(SVEBitwiseShiftByWideElements_Predicated)";
+ Format(instr, "unallocated", "(SVEBitwiseShiftByWideElements_Predicated)");
} else {
- switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
- case ASR_z_p_zw:
- mnemonic = "asr";
- break;
- case LSL_z_p_zw:
- mnemonic = "lsl";
- break;
- case LSR_z_p_zw:
- mnemonic = "lsr";
- break;
- default:
- form = "(SVEBitwiseShiftByWideElements_Predicated)";
- break;
- }
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d");
}
- Format(instr, mnemonic, form);
}
static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) {
@@ -5929,14 +3887,16 @@ void Disassembler::VisitSVEBroadcastFPImm_Unpredicated(
const char *mnemonic = "unimplemented";
const char *form = "(SVEBroadcastFPImm_Unpredicated)";
- switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
- case FDUP_z_i:
- // The preferred disassembly for fdup is "fmov".
- mnemonic = "fmov";
- form = "'Zd.'t, 'IFPSve";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() != kFormatVnB) {
+ switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
+ case FDUP_z_i:
+ // The preferred disassembly for fdup is "fmov".
+ mnemonic = "fmov";
+ form = "'Zd.'t, 'IFPSve";
+ break;
+ default:
+ break;
+ }
}
Format(instr, mnemonic, form);
}
@@ -5976,9 +3936,9 @@ void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) {
if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) {
// If imm2:tsz has one set bit, the index is zero. This is
// disassembled as a mov from a b/h/s/d/q scalar register.
- form = "'Zd.'tszx, 'tszx'u0905";
+ form = "'Zd.'ti, 'ti'u0905";
} else {
- form = "'Zd.'tszx, 'Zn.'tszx['IVInsSVEIndex]";
+ form = "'Zd.'ti, 'Zn.'ti['IVInsSVEIndex]";
}
}
break;
@@ -6013,304 +3973,145 @@ void Disassembler::VisitSVEBroadcastIntImm_Unpredicated(
}
void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVECompressActiveElements)";
-
- switch (instr->Mask(SVECompressActiveElementsMask)) {
- case COMPACT_z_p_z:
- // The top bit of size is always set for compact, so 't can only be
- // substituted with types S and D.
- VIXL_ASSERT(instr->ExtractBit(23) == 1);
- mnemonic = "compact";
- form = "'Zd.'t, 'Pgl, 'Zn.'t";
- break;
- default:
- break;
+ // The top bit of size is always set for compact, so 't can only be
+ // substituted with types S and D.
+ if (instr->ExtractBit(23) == 1) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zn.'t");
+ } else {
+ VisitUnallocated(instr);
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEConditionallyBroadcastElementToVector(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
- case CLASTA_z_p_zz:
- mnemonic = "clasta";
- break;
- case CLASTB_z_p_zz:
- mnemonic = "clastb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t";
if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
form = "'Xd, p'u1210, 'Xd, 'Zn.'t";
}
-
- switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
- case CLASTA_r_p_z:
- mnemonic = "clasta";
- break;
- case CLASTB_r_p_z:
- mnemonic = "clastb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
-
- switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
- case CLASTA_v_p_z:
- mnemonic = "clasta";
- break;
- case CLASTB_v_p_z:
- mnemonic = "clastb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t");
}
void Disassembler::VisitSVEConditionallyTerminateScalars(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm";
-
- switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
- case CTERMEQ_rr:
- mnemonic = "ctermeq";
- break;
- case CTERMNE_rr:
- mnemonic = "ctermne";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEConstructivePrefix_Unpredicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEConstructivePrefix_Unpredicated)";
-
- switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
- case MOVPRFX_z_z:
- mnemonic = "movprfx";
- form = "'Zd, 'Zn";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd, 'Zn");
}
void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
-
- bool rm_is_zr = instr->GetRm() == kZeroRegCode;
-
const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
- const char *suffix = NULL;
+ const char *suffix = "]";
- switch (instr->Mask(SVEContiguousFirstFaultLoad_ScalarPlusScalarMask)) {
- case LDFF1B_z_p_br_u16:
- case LDFF1B_z_p_br_u32:
- case LDFF1B_z_p_br_u64:
- case LDFF1B_z_p_br_u8:
- mnemonic = "ldff1b";
- suffix = rm_is_zr ? "]" : ", 'Xm]";
- break;
- case LDFF1D_z_p_br_u64:
- mnemonic = "ldff1d";
- suffix = rm_is_zr ? "]" : ", 'Xm, lsl #3]";
- break;
- case LDFF1H_z_p_br_u16:
- case LDFF1H_z_p_br_u32:
- case LDFF1H_z_p_br_u64:
- mnemonic = "ldff1h";
- suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
- break;
- case LDFF1SB_z_p_br_s16:
- case LDFF1SB_z_p_br_s32:
- case LDFF1SB_z_p_br_s64:
- mnemonic = "ldff1sb";
- suffix = rm_is_zr ? "]" : ", 'Xm]";
- break;
- case LDFF1SH_z_p_br_s32:
- case LDFF1SH_z_p_br_s64:
- mnemonic = "ldff1sh";
- suffix = rm_is_zr ? "]" : ", 'Xm, lsl #1]";
- break;
- case LDFF1SW_z_p_br_s64:
- mnemonic = "ldff1sw";
- suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
- break;
- case LDFF1W_z_p_br_u32:
- case LDFF1W_z_p_br_u64:
- mnemonic = "ldff1w";
- suffix = rm_is_zr ? "]" : ", 'Xm, lsl #2]";
- break;
- default:
- form = "(SVEContiguousFirstFaultLoad_ScalarPlusScalar)";
- break;
+ if (instr->GetRm() != kZeroRegCode) {
+ switch (form_hash_) {
+ case "ldff1b_z_p_br_u8"_h:
+ case "ldff1b_z_p_br_u16"_h:
+ case "ldff1b_z_p_br_u32"_h:
+ case "ldff1b_z_p_br_u64"_h:
+ case "ldff1sb_z_p_br_s16"_h:
+ case "ldff1sb_z_p_br_s32"_h:
+ case "ldff1sb_z_p_br_s64"_h:
+ suffix = ", 'Xm]";
+ break;
+ case "ldff1h_z_p_br_u16"_h:
+ case "ldff1h_z_p_br_u32"_h:
+ case "ldff1h_z_p_br_u64"_h:
+ case "ldff1sh_z_p_br_s32"_h:
+ case "ldff1sh_z_p_br_s64"_h:
+ suffix = ", 'Xm, lsl #1]";
+ break;
+ case "ldff1w_z_p_br_u32"_h:
+ case "ldff1w_z_p_br_u64"_h:
+ case "ldff1sw_z_p_br_s64"_h:
+ suffix = ", 'Xm, lsl #2]";
+ break;
+ case "ldff1d_z_p_br_u64"_h:
+ suffix = ", 'Xm, lsl #3]";
+ break;
+ }
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
const char *suffix =
(instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
-
- switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
- case LDNF1B_z_p_bi_u16:
- case LDNF1B_z_p_bi_u32:
- case LDNF1B_z_p_bi_u64:
- case LDNF1B_z_p_bi_u8:
- mnemonic = "ldnf1b";
- break;
- case LDNF1D_z_p_bi_u64:
- mnemonic = "ldnf1d";
- break;
- case LDNF1H_z_p_bi_u16:
- case LDNF1H_z_p_bi_u32:
- case LDNF1H_z_p_bi_u64:
- mnemonic = "ldnf1h";
- break;
- case LDNF1SB_z_p_bi_s16:
- case LDNF1SB_z_p_bi_s32:
- case LDNF1SB_z_p_bi_s64:
- mnemonic = "ldnf1sb";
- break;
- case LDNF1SH_z_p_bi_s32:
- case LDNF1SH_z_p_bi_s64:
- mnemonic = "ldnf1sh";
- break;
- case LDNF1SW_z_p_bi_s64:
- mnemonic = "ldnf1sw";
- break;
- case LDNF1W_z_p_bi_u32:
- case LDNF1W_z_p_bi_u64:
- mnemonic = "ldnf1w";
- break;
- default:
- form = "(SVEContiguousNonFaultLoad_ScalarPlusImm)";
- suffix = NULL;
- break;
- }
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusImm)";
-
+ const char *form = "{'Zt.b}, 'Pgl/z, ['Xns";
const char *suffix =
(instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
- switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
- case LDNT1B_z_p_bi_contiguous:
- mnemonic = "ldnt1b";
- form = "{'Zt.b}, 'Pgl/z, ['Xns";
- break;
- case LDNT1D_z_p_bi_contiguous:
- mnemonic = "ldnt1d";
+ switch (form_hash_) {
+ case "ldnt1d_z_p_bi_contiguous"_h:
form = "{'Zt.d}, 'Pgl/z, ['Xns";
break;
- case LDNT1H_z_p_bi_contiguous:
- mnemonic = "ldnt1h";
+ case "ldnt1h_z_p_bi_contiguous"_h:
form = "{'Zt.h}, 'Pgl/z, ['Xns";
break;
- case LDNT1W_z_p_bi_contiguous:
- mnemonic = "ldnt1w";
+ case "ldnt1w_z_p_bi_contiguous"_h:
form = "{'Zt.s}, 'Pgl/z, ['Xns";
break;
- default:
- suffix = NULL;
- break;
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEContiguousNonTemporalLoad_ScalarPlusScalar)";
-
- switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
- case LDNT1B_z_p_br_contiguous:
- mnemonic = "ldnt1b";
- form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
- break;
- case LDNT1D_z_p_br_contiguous:
- mnemonic = "ldnt1d";
+ const char *form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
+ switch (form_hash_) {
+ case "ldnt1d_z_p_br_contiguous"_h:
form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
break;
- case LDNT1H_z_p_br_contiguous:
- mnemonic = "ldnt1h";
+ case "ldnt1h_z_p_br_contiguous"_h:
form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
break;
- case LDNT1W_z_p_br_contiguous:
- mnemonic = "ldnt1w";
+ case "ldnt1w_z_p_br_contiguous"_h:
form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusImm)";
-
+ const char *form = "{'Zt.b}, 'Pgl, ['Xns";
const char *suffix =
(instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
- switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
- case STNT1B_z_p_bi_contiguous:
- mnemonic = "stnt1b";
- form = "{'Zt.b}, 'Pgl, ['Xns";
- break;
- case STNT1D_z_p_bi_contiguous:
- mnemonic = "stnt1d";
+
+ switch (form_hash_) {
+ case "stnt1d_z_p_bi_contiguous"_h:
form = "{'Zt.d}, 'Pgl, ['Xns";
break;
- case STNT1H_z_p_bi_contiguous:
- mnemonic = "stnt1h";
+ case "stnt1h_z_p_bi_contiguous"_h:
form = "{'Zt.h}, 'Pgl, ['Xns";
break;
- case STNT1W_z_p_bi_contiguous:
- mnemonic = "stnt1w";
+ case "stnt1w_z_p_bi_contiguous"_h:
form = "{'Zt.s}, 'Pgl, ['Xns";
break;
- default:
- suffix = NULL;
- break;
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
@@ -6343,28 +4144,10 @@ void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = (instr->ExtractBits(21, 16) != 0)
? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]"
: "'prefSVEOp, 'Pgl, ['Xns]";
-
- switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
- case PRFB_i_p_bi_s:
- mnemonic = "prfb";
- break;
- case PRFD_i_p_bi_s:
- mnemonic = "prfd";
- break;
- case PRFH_i_p_bi_s:
- mnemonic = "prfh";
- break;
- case PRFW_i_p_bi_s:
- mnemonic = "prfw";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar(
@@ -6399,71 +4182,34 @@ void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar(
void Disassembler::VisitSVEContiguousStore_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
-
// The 'size' field isn't in the usual place here.
const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]";
if (instr->ExtractBits(19, 16) == 0) {
form = "{'Zt.'tls}, 'Pgl, ['Xns]";
}
-
- switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
- case ST1B_z_p_bi:
- mnemonic = "st1b";
- break;
- case ST1D_z_p_bi:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_bi:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_bi:
- mnemonic = "st1w";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
-
// The 'size' field isn't in the usual place here.
- const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]";
-
- switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
- case ST1B_z_p_br:
- mnemonic = "st1b";
- break;
- case ST1D_z_p_br:
- mnemonic = "st1d";
- break;
- case ST1H_z_p_br:
- mnemonic = "st1h";
- break;
- case ST1W_z_p_br:
- mnemonic = "st1w";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]");
}
void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) {
const char *mnemonic = "unimplemented";
const char *form = "(SVECopyFPImm_Predicated)";
- switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
- case FCPY_z_p_i:
- // The preferred disassembly for fcpy is "fmov".
- mnemonic = "fmov";
- form = "'Zd.'t, 'Pm/m, 'IFPSve";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() != kFormatVnB) {
+ switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
+ case FCPY_z_p_i:
+ // The preferred disassembly for fcpy is "fmov".
+ mnemonic = "fmov";
+ form = "'Zd.'t, 'Pm/m, 'IFPSve";
+ break;
+ default:
+ break;
+ }
}
Format(instr, mnemonic, form);
}
@@ -6526,381 +4272,168 @@ void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
void Disassembler::VisitSVEExtractElementToGeneralRegister(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "'Wd, 'Pgl, 'Zn.'t";
-
if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
form = "'Xd, p'u1210, 'Zn.'t";
}
-
- switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
- case LASTA_r_p_z:
- mnemonic = "lasta";
- break;
- case LASTB_r_p_z:
- mnemonic = "lastb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
-
- switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
- case LASTA_v_p_z:
- mnemonic = "lasta";
- break;
- case LASTB_v_p_z:
- mnemonic = "lastb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t");
}
void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFFRInitialise)";
-
- switch (instr->Mask(SVEFFRInitialiseMask)) {
- case SETFFR_f:
- mnemonic = "setffr";
- form = " ";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ DisassembleNoArgs(instr);
}
void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFFRWriteFromPredicate)";
-
- switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
- case WRFFR_f_p:
- mnemonic = "wrffr";
- form = "'Pn.b";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pn.b");
}
void Disassembler::VisitSVEFPArithmeticWithImm_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form00 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.0";
- const char *form05 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #0.5";
- const char *form10 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #1.0";
- const char *form20 = "'Zd.'t, 'Pgl/m, 'Zd.'t, #2.0";
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, #";
+ const char *suffix00 = "0.0";
+ const char *suffix05 = "0.5";
+ const char *suffix10 = "1.0";
+ const char *suffix20 = "2.0";
int i1 = instr->ExtractBit(5);
- const char *form = i1 ? form10 : form00;
+ const char *suffix = i1 ? suffix10 : suffix00;
- switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
- case FADD_z_p_zs:
- mnemonic = "fadd";
- form = i1 ? form10 : form05;
- break;
- case FMAXNM_z_p_zs:
- mnemonic = "fmaxnm";
- break;
- case FMAX_z_p_zs:
- mnemonic = "fmax";
- break;
- case FMINNM_z_p_zs:
- mnemonic = "fminnm";
- break;
- case FMIN_z_p_zs:
- mnemonic = "fmin";
- break;
- case FMUL_z_p_zs:
- mnemonic = "fmul";
- form = i1 ? form20 : form05;
- break;
- case FSUBR_z_p_zs:
- mnemonic = "fsubr";
- form = i1 ? form10 : form05;
- break;
- case FSUB_z_p_zs:
- mnemonic = "fsub";
- form = i1 ? form10 : form05;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ return;
+ }
+
+ switch (form_hash_) {
+ case "fadd_z_p_zs"_h:
+ case "fsubr_z_p_zs"_h:
+ case "fsub_z_p_zs"_h:
+ suffix = i1 ? suffix10 : suffix05;
break;
- default:
- form = "(SVEFPArithmeticWithImm_Predicated)";
+ case "fmul_z_p_zs"_h:
+ suffix = i1 ? suffix20 : suffix05;
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
- case FABD_z_p_zz:
- mnemonic = "fabd";
- break;
- case FADD_z_p_zz:
- mnemonic = "fadd";
- break;
- case FDIVR_z_p_zz:
- mnemonic = "fdivr";
- break;
- case FDIV_z_p_zz:
- mnemonic = "fdiv";
- break;
- case FMAXNM_z_p_zz:
- mnemonic = "fmaxnm";
- break;
- case FMAX_z_p_zz:
- mnemonic = "fmax";
- break;
- case FMINNM_z_p_zz:
- mnemonic = "fminnm";
- break;
- case FMIN_z_p_zz:
- mnemonic = "fmin";
- break;
- case FMULX_z_p_zz:
- mnemonic = "fmulx";
- break;
- case FMUL_z_p_zz:
- mnemonic = "fmul";
- break;
- case FSCALE_z_p_zz:
- mnemonic = "fscale";
- break;
- case FSUBR_z_p_zz:
- mnemonic = "fsubr";
- break;
- case FSUB_z_p_zz:
- mnemonic = "fsub";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPConvertPrecision)";
+ const char *form = NULL;
- switch (instr->Mask(SVEFPConvertPrecisionMask)) {
- case FCVT_z_p_z_d2h:
- mnemonic = "fcvt";
+ switch (form_hash_) {
+ case "fcvt_z_p_z_d2h"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.d";
break;
- case FCVT_z_p_z_d2s:
- mnemonic = "fcvt";
+ case "fcvt_z_p_z_d2s"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.d";
break;
- case FCVT_z_p_z_h2d:
- mnemonic = "fcvt";
+ case "fcvt_z_p_z_h2d"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.h";
break;
- case FCVT_z_p_z_h2s:
- mnemonic = "fcvt";
+ case "fcvt_z_p_z_h2s"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.h";
break;
- case FCVT_z_p_z_s2d:
- mnemonic = "fcvt";
+ case "fcvt_z_p_z_s2d"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.s";
break;
- case FCVT_z_p_z_s2h:
- mnemonic = "fcvt";
+ case "fcvt_z_p_z_s2h"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.s";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPConvertToInt)";
+ const char *form = NULL;
- switch (instr->Mask(SVEFPConvertToIntMask)) {
- case FCVTZS_z_p_z_d2w:
- mnemonic = "fcvtzs";
- form = "'Zd.s, 'Pgl/m, 'Zn.d";
- break;
- case FCVTZS_z_p_z_d2x:
- mnemonic = "fcvtzs";
- form = "'Zd.d, 'Pgl/m, 'Zn.d";
- break;
- case FCVTZS_z_p_z_fp162h:
- mnemonic = "fcvtzs";
- form = "'Zd.h, 'Pgl/m, 'Zn.h";
- break;
- case FCVTZS_z_p_z_fp162w:
- mnemonic = "fcvtzs";
- form = "'Zd.s, 'Pgl/m, 'Zn.h";
- break;
- case FCVTZS_z_p_z_fp162x:
- mnemonic = "fcvtzs";
- form = "'Zd.d, 'Pgl/m, 'Zn.h";
- break;
- case FCVTZS_z_p_z_s2w:
- mnemonic = "fcvtzs";
- form = "'Zd.s, 'Pgl/m, 'Zn.s";
- break;
- case FCVTZS_z_p_z_s2x:
- mnemonic = "fcvtzs";
- form = "'Zd.d, 'Pgl/m, 'Zn.s";
- break;
- case FCVTZU_z_p_z_d2w:
- mnemonic = "fcvtzu";
+ switch (form_hash_) {
+ case "fcvtzs_z_p_z_d2w"_h:
+ case "fcvtzu_z_p_z_d2w"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.d";
break;
- case FCVTZU_z_p_z_d2x:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_d2x"_h:
+ case "fcvtzu_z_p_z_d2x"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.d";
break;
- case FCVTZU_z_p_z_fp162h:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_fp162h"_h:
+ case "fcvtzu_z_p_z_fp162h"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.h";
break;
- case FCVTZU_z_p_z_fp162w:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_fp162w"_h:
+ case "fcvtzu_z_p_z_fp162w"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.h";
break;
- case FCVTZU_z_p_z_fp162x:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_fp162x"_h:
+ case "fcvtzu_z_p_z_fp162x"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.h";
break;
- case FCVTZU_z_p_z_s2w:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_s2w"_h:
+ case "fcvtzu_z_p_z_s2w"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.s";
break;
- case FCVTZU_z_p_z_s2x:
- mnemonic = "fcvtzu";
+ case "fcvtzs_z_p_z_s2x"_h:
+ case "fcvtzu_z_p_z_s2x"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.s";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPExponentialAccelerator)";
-
unsigned size = instr->GetSVESize();
- switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
- case FEXPA_z_z:
- if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
- (size == kDRegSizeInBytesLog2)) {
- mnemonic = "fexpa";
- form = "'Zd.'t, 'Zn.'t";
- }
- break;
- default:
- break;
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t");
+ } else {
+ VisitUnallocated(instr);
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
-
- switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
- case FRINTA_z_p_z:
- mnemonic = "frinta";
- break;
- case FRINTI_z_p_z:
- mnemonic = "frinti";
- break;
- case FRINTM_z_p_z:
- mnemonic = "frintm";
- break;
- case FRINTN_z_p_z:
- mnemonic = "frintn";
- break;
- case FRINTP_z_p_z:
- mnemonic = "frintp";
- break;
- case FRINTX_z_p_z:
- mnemonic = "frintx";
- break;
- case FRINTZ_z_p_z:
- mnemonic = "frintz";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPTrigMulAddCoefficient)";
-
unsigned size = instr->GetSVESize();
- switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
- case FTMAD_z_zzi:
- if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
- (size == kDRegSizeInBytesLog2)) {
- mnemonic = "ftmad";
- form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816";
- }
- break;
- default:
- break;
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816");
+ } else {
+ VisitUnallocated(instr);
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPTrigSelectCoefficient)";
-
unsigned size = instr->GetSVESize();
- switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
- case FTSSEL_z_zz:
- if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
- (size == kDRegSizeInBytesLog2)) {
- mnemonic = "ftssel";
- form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
- }
- break;
- default:
- break;
+ if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) ||
+ (size == kDRegSizeInBytesLog2)) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t");
+ } else {
+ VisitUnallocated(instr);
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
-
if (instr->GetSVESize() == kBRegSizeInBytesLog2) {
- form = "(SVEFPUnaryOp)";
+ VisitUnallocated(instr);
} else {
- switch (instr->Mask(SVEFPUnaryOpMask)) {
- case FRECPX_z_p_z:
- mnemonic = "frecpx";
- break;
- case FSQRT_z_p_z:
- mnemonic = "fsqrt";
- break;
- default:
- form = "(SVEFPUnaryOp)";
- break;
- }
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
static const char *IncDecFormHelper(const Instruction *instr,
@@ -6921,399 +4454,125 @@ static const char *IncDecFormHelper(const Instruction *instr,
void Disassembler::VisitSVEIncDecRegisterByElementCount(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form =
IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
-
- switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
- case DECB_r_rs:
- mnemonic = "decb";
- break;
- case DECD_r_rs:
- mnemonic = "decd";
- break;
- case DECH_r_rs:
- mnemonic = "dech";
- break;
- case DECW_r_rs:
- mnemonic = "decw";
- break;
- case INCB_r_rs:
- mnemonic = "incb";
- break;
- case INCD_r_rs:
- mnemonic = "incd";
- break;
- case INCH_r_rs:
- mnemonic = "inch";
- break;
- case INCW_r_rs:
- mnemonic = "incw";
- break;
- default:
- form = "(SVEIncDecRegisterByElementCount)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIncDecVectorByElementCount(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = IncDecFormHelper(instr,
"'Zd.'t, 'Ipc, mul #'u1916+1",
"'Zd.'t, 'Ipc",
"'Zd.'t");
-
- switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
- case DECD_z_zs:
- mnemonic = "decd";
- break;
- case DECH_z_zs:
- mnemonic = "dech";
- break;
- case DECW_z_zs:
- mnemonic = "decw";
- break;
- case INCD_z_zs:
- mnemonic = "incd";
- break;
- case INCH_z_zs:
- mnemonic = "inch";
- break;
- case INCW_z_zs:
- mnemonic = "incw";
- break;
- default:
- form = "(SVEIncDecVectorByElementCount)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEInsertGeneralRegister)";
-
- switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
- case INSR_z_r:
- mnemonic = "insr";
- if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
- form = "'Zd.'t, 'Xn";
- } else {
- form = "'Zd.'t, 'Wn";
- }
- break;
- default:
- break;
+ const char *form = "'Zd.'t, 'Wn";
+ if (instr->GetSVESize() == kDRegSizeInBytesLog2) {
+ form = "'Zd.'t, 'Xn";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEInsertSIMDFPScalarRegister(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEInsertSIMDFPScalarRegister)";
-
- switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
- case INSR_z_v:
- mnemonic = "insr";
- form = "'Zd.'t, 'Vnv";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Vnv");
}
void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = (instr->ExtractBit(13) == 0)
? "'Zd.'t, 'Zd.'t, #'u1205"
: "'Zd.'t, 'Zd.'t, #'u1205, lsl #8";
-
- switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
- case ADD_z_zi:
- mnemonic = "add";
- break;
- case SQADD_z_zi:
- mnemonic = "sqadd";
- break;
- case SQSUB_z_zi:
- mnemonic = "sqsub";
- break;
- case SUBR_z_zi:
- mnemonic = "subr";
- break;
- case SUB_z_zi:
- mnemonic = "sub";
- break;
- case UQADD_z_zi:
- mnemonic = "uqadd";
- break;
- case UQSUB_z_zi:
- mnemonic = "uqsub";
- break;
- default:
- form = "(SVEIntAddSubtractImm_Unpredicated)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIntAddSubtractVectors_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
- case ADD_z_p_zz:
- mnemonic = "add";
- break;
- case SUBR_z_p_zz:
- mnemonic = "subr";
- break;
- case SUB_z_p_zz:
- mnemonic = "sub";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEIntCompareScalarCountAndLimit(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form =
(instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm";
-
- switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
- case WHILELE_p_p_rr:
- mnemonic = "whilele";
- break;
- case WHILELO_p_p_rr:
- mnemonic = "whilelo";
- break;
- case WHILELS_p_p_rr:
- mnemonic = "whilels";
- break;
- case WHILELT_p_p_rr:
- mnemonic = "whilelt";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIntConvertToFP)";
-
- switch (instr->Mask(SVEIntConvertToFPMask)) {
- case SCVTF_z_p_z_h2fp16:
- mnemonic = "scvtf";
- form = "'Zd.h, 'Pgl/m, 'Zn.h";
- break;
- case SCVTF_z_p_z_w2d:
- mnemonic = "scvtf";
- form = "'Zd.d, 'Pgl/m, 'Zn.s";
- break;
- case SCVTF_z_p_z_w2fp16:
- mnemonic = "scvtf";
- form = "'Zd.h, 'Pgl/m, 'Zn.s";
- break;
- case SCVTF_z_p_z_w2s:
- mnemonic = "scvtf";
- form = "'Zd.s, 'Pgl/m, 'Zn.s";
- break;
- case SCVTF_z_p_z_x2d:
- mnemonic = "scvtf";
- form = "'Zd.d, 'Pgl/m, 'Zn.d";
- break;
- case SCVTF_z_p_z_x2fp16:
- mnemonic = "scvtf";
- form = "'Zd.h, 'Pgl/m, 'Zn.d";
- break;
- case SCVTF_z_p_z_x2s:
- mnemonic = "scvtf";
- form = "'Zd.s, 'Pgl/m, 'Zn.d";
- break;
- case UCVTF_z_p_z_h2fp16:
- mnemonic = "ucvtf";
+ const char *form = NULL;
+ switch (form_hash_) {
+ case "scvtf_z_p_z_h2fp16"_h:
+ case "ucvtf_z_p_z_h2fp16"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.h";
break;
- case UCVTF_z_p_z_w2d:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_w2d"_h:
+ case "ucvtf_z_p_z_w2d"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.s";
break;
- case UCVTF_z_p_z_w2fp16:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_w2fp16"_h:
+ case "ucvtf_z_p_z_w2fp16"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.s";
break;
- case UCVTF_z_p_z_w2s:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_w2s"_h:
+ case "ucvtf_z_p_z_w2s"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.s";
break;
- case UCVTF_z_p_z_x2d:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_x2d"_h:
+ case "ucvtf_z_p_z_x2d"_h:
form = "'Zd.d, 'Pgl/m, 'Zn.d";
break;
- case UCVTF_z_p_z_x2fp16:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_x2fp16"_h:
+ case "ucvtf_z_p_z_x2fp16"_h:
form = "'Zd.h, 'Pgl/m, 'Zn.d";
break;
- case UCVTF_z_p_z_x2s:
- mnemonic = "ucvtf";
+ case "scvtf_z_p_z_x2s"_h:
+ case "ucvtf_z_p_z_x2s"_h:
form = "'Zd.s, 'Pgl/m, 'Zn.d";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIntDivideVectors_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
- case SDIVR_z_p_zz:
- mnemonic = "sdivr";
- break;
- case SDIV_z_p_zz:
- mnemonic = "sdiv";
- break;
- case UDIVR_z_p_zz:
- mnemonic = "udivr";
- break;
- case UDIV_z_p_zz:
- mnemonic = "udiv";
- break;
- default:
- break;
- }
-
- switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
- case SDIVR_z_p_zz:
- case SDIV_z_p_zz:
- case UDIVR_z_p_zz:
- case UDIV_z_p_zz:
- switch (instr->GetSVESize()) {
- case kBRegSizeInBytesLog2:
- case kHRegSizeInBytesLog2:
- mnemonic = "unimplemented";
- form = "(SVEIntBinaryArithmeticPredicated)";
- break;
- case kSRegSizeInBytesLog2:
- case kDRegSizeInBytesLog2:
- // The default form works for these instructions.
- break;
- default:
- // GetSVESize() should never return other values.
- VIXL_UNREACHABLE();
- break;
- }
+ unsigned size = instr->GetSVESize();
+ if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
+ } else {
+ VisitUnallocated(instr);
}
-
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEIntMinMaxDifference_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
- case SABD_z_p_zz:
- mnemonic = "sabd";
- break;
- case SMAX_z_p_zz:
- mnemonic = "smax";
- break;
- case SMIN_z_p_zz:
- mnemonic = "smin";
- break;
- case UABD_z_p_zz:
- mnemonic = "uabd";
- break;
- case UMAX_z_p_zz:
- mnemonic = "umax";
- break;
- case UMIN_z_p_zz:
- mnemonic = "umin";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zd.'t, #'u1205";
+ const char *form = "'Zd.'t, 'Zd.'t, #";
+ const char *suffix = "'u1205";
- switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
- case SMAX_z_zi:
- mnemonic = "smax";
- form = "'Zd.'t, 'Zd.'t, #'s1205";
- break;
- case SMIN_z_zi:
- mnemonic = "smin";
- form = "'Zd.'t, 'Zd.'t, #'s1205";
- break;
- case UMAX_z_zi:
- mnemonic = "umax";
- break;
- case UMIN_z_zi:
- mnemonic = "umin";
- break;
- default:
+ switch (form_hash_) {
+ case "smax_z_zi"_h:
+ case "smin_z_zi"_h:
+ suffix = "'s1205";
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIntMulImm_Unpredicated)";
-
- switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
- case MUL_z_zi:
- mnemonic = "mul";
- form = "'Zd.'t, 'Zd.'t, #'s1205";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, #'s1205");
}
void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
- case MUL_z_p_zz:
- mnemonic = "mul";
- break;
- case SMULH_z_p_zz:
- mnemonic = "smulh";
- break;
- case UMULH_z_p_zz:
- mnemonic = "umulh";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "(SVELoadAndBroadcastElement)";
const char *suffix_b = ", #'u2116]";
const char *suffix_h = ", #'u2116*2]";
@@ -7321,88 +4580,52 @@ void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) {
const char *suffix_d = ", #'u2116*8]";
const char *suffix = NULL;
- switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
- case LD1RB_z_p_bi_u16:
- mnemonic = "ld1rb";
- form = "{'Zt.h}, 'Pgl/z, ['Xns";
- suffix = suffix_b;
- break;
- case LD1RB_z_p_bi_u32:
- mnemonic = "ld1rb";
- form = "{'Zt.s}, 'Pgl/z, ['Xns";
- suffix = suffix_b;
- break;
- case LD1RB_z_p_bi_u64:
- mnemonic = "ld1rb";
- form = "{'Zt.d}, 'Pgl/z, ['Xns";
- suffix = suffix_b;
- break;
- case LD1RB_z_p_bi_u8:
- mnemonic = "ld1rb";
+ switch (form_hash_) {
+ case "ld1rb_z_p_bi_u8"_h:
form = "{'Zt.b}, 'Pgl/z, ['Xns";
suffix = suffix_b;
break;
- case LD1RD_z_p_bi_u64:
- mnemonic = "ld1rd";
- form = "{'Zt.d}, 'Pgl/z, ['Xns";
- suffix = suffix_d;
- break;
- case LD1RH_z_p_bi_u16:
- mnemonic = "ld1rh";
- form = "{'Zt.h}, 'Pgl/z, ['Xns";
- suffix = suffix_h;
- break;
- case LD1RH_z_p_bi_u32:
- mnemonic = "ld1rh";
- form = "{'Zt.s}, 'Pgl/z, ['Xns";
- suffix = suffix_h;
- break;
- case LD1RH_z_p_bi_u64:
- mnemonic = "ld1rh";
- form = "{'Zt.d}, 'Pgl/z, ['Xns";
- suffix = suffix_h;
- break;
- case LD1RSB_z_p_bi_s16:
- mnemonic = "ld1rsb";
+ case "ld1rb_z_p_bi_u16"_h:
+ case "ld1rsb_z_p_bi_s16"_h:
form = "{'Zt.h}, 'Pgl/z, ['Xns";
suffix = suffix_b;
break;
- case LD1RSB_z_p_bi_s32:
- mnemonic = "ld1rsb";
+ case "ld1rb_z_p_bi_u32"_h:
+ case "ld1rsb_z_p_bi_s32"_h:
form = "{'Zt.s}, 'Pgl/z, ['Xns";
suffix = suffix_b;
break;
- case LD1RSB_z_p_bi_s64:
- mnemonic = "ld1rsb";
+ case "ld1rb_z_p_bi_u64"_h:
+ case "ld1rsb_z_p_bi_s64"_h:
form = "{'Zt.d}, 'Pgl/z, ['Xns";
suffix = suffix_b;
break;
- case LD1RSH_z_p_bi_s32:
- mnemonic = "ld1rsh";
- form = "{'Zt.s}, 'Pgl/z, ['Xns";
+ case "ld1rh_z_p_bi_u16"_h:
+ form = "{'Zt.h}, 'Pgl/z, ['Xns";
suffix = suffix_h;
break;
- case LD1RSH_z_p_bi_s64:
- mnemonic = "ld1rsh";
- form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ case "ld1rh_z_p_bi_u32"_h:
+ case "ld1rsh_z_p_bi_s32"_h:
+ form = "{'Zt.s}, 'Pgl/z, ['Xns";
suffix = suffix_h;
break;
- case LD1RSW_z_p_bi_s64:
- mnemonic = "ld1rsw";
+ case "ld1rh_z_p_bi_u64"_h:
+ case "ld1rsh_z_p_bi_s64"_h:
form = "{'Zt.d}, 'Pgl/z, ['Xns";
- suffix = suffix_w;
+ suffix = suffix_h;
break;
- case LD1RW_z_p_bi_u32:
- mnemonic = "ld1rw";
+ case "ld1rw_z_p_bi_u32"_h:
form = "{'Zt.s}, 'Pgl/z, ['Xns";
suffix = suffix_w;
break;
- case LD1RW_z_p_bi_u64:
- mnemonic = "ld1rw";
+ case "ld1rsw_z_p_bi_s64"_h:
+ case "ld1rw_z_p_bi_u64"_h:
form = "{'Zt.d}, 'Pgl/z, ['Xns";
suffix = suffix_w;
break;
- default:
+ case "ld1rd_z_p_bi_u64"_h:
+ form = "{'Zt.d}, 'Pgl/z, ['Xns";
+ suffix = suffix_d;
break;
}
@@ -7411,401 +4634,141 @@ void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) {
suffix = "]";
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
-void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusImm)";
+ const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns";
+ const char *suffix = ", #'s1916*16]";
- const char *suffix =
- (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916*16]";
-
- switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
- case LD1RQB_z_p_bi_u8:
- mnemonic = "ld1rqb";
- form = "{'Zt.b}, 'Pgl/z, ['Xns";
- break;
- case LD1RQD_z_p_bi_u64:
- mnemonic = "ld1rqd";
- form = "{'Zt.d}, 'Pgl/z, ['Xns";
- break;
- case LD1RQH_z_p_bi_u16:
- mnemonic = "ld1rqh";
- form = "{'Zt.h}, 'Pgl/z, ['Xns";
- break;
- case LD1RQW_z_p_bi_u32:
- mnemonic = "ld1rqw";
- form = "{'Zt.s}, 'Pgl/z, ['Xns";
- break;
- default:
- suffix = NULL;
+ switch (form_hash_) {
+ case "ld1rob_z_p_bi_u8"_h:
+ case "ld1rod_z_p_bi_u64"_h:
+ case "ld1roh_z_p_bi_u16"_h:
+ case "ld1row_z_p_bi_u32"_h:
+ suffix = ", #'s1916*32]";
break;
}
- Format(instr, mnemonic, form, suffix);
+ if (instr->ExtractBits(19, 16) == 0) suffix = "]";
+
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
-void Disassembler::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadAndBroadcastQuadword_ScalarPlusScalar)";
+ const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns, ";
+ const char *suffix = "'Rm, lsl #'u2423]";
- switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
- case LD1RQB_z_p_br_contiguous:
- mnemonic = "ld1rqb";
- form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]";
- break;
- case LD1RQD_z_p_br_contiguous:
- mnemonic = "ld1rqd";
- form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]";
- break;
- case LD1RQH_z_p_br_contiguous:
- mnemonic = "ld1rqh";
- form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]";
- break;
- case LD1RQW_z_p_br_contiguous:
- mnemonic = "ld1rqw";
- form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]";
- break;
- default:
+ switch (form_hash_) {
+ case "ld1rqb_z_p_br_contiguous"_h:
+ case "ld1rob_z_p_br_contiguous"_h:
+ suffix = "'Rm]";
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadMultipleStructures_ScalarPlusImm)";
-
- const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
- const char *form_3 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns'ISveSvl]";
- const char *form_4 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
- "'Pgl/z, ['Xns'ISveSvl]";
-
- switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
- case LD2B_z_p_bi_contiguous:
- mnemonic = "ld2b";
- form = form_2;
- break;
- case LD2D_z_p_bi_contiguous:
- mnemonic = "ld2d";
- form = form_2;
- break;
- case LD2H_z_p_bi_contiguous:
- mnemonic = "ld2h";
- form = form_2;
- break;
- case LD2W_z_p_bi_contiguous:
- mnemonic = "ld2w";
- form = form_2;
- break;
- case LD3B_z_p_bi_contiguous:
- mnemonic = "ld3b";
- form = form_3;
- break;
- case LD3D_z_p_bi_contiguous:
- mnemonic = "ld3d";
- form = form_3;
- break;
- case LD3H_z_p_bi_contiguous:
- mnemonic = "ld3h";
- form = form_3;
- break;
- case LD3W_z_p_bi_contiguous:
- mnemonic = "ld3w";
+ const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}";
+ const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}";
+ const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}";
+ const char *suffix = ", 'Pgl/z, ['Xns'ISveSvl]";
+
+ switch (form_hash_) {
+ case "ld3b_z_p_bi_contiguous"_h:
+ case "ld3d_z_p_bi_contiguous"_h:
+ case "ld3h_z_p_bi_contiguous"_h:
+ case "ld3w_z_p_bi_contiguous"_h:
form = form_3;
break;
- case LD4B_z_p_bi_contiguous:
- mnemonic = "ld4b";
+ case "ld4b_z_p_bi_contiguous"_h:
+ case "ld4d_z_p_bi_contiguous"_h:
+ case "ld4h_z_p_bi_contiguous"_h:
+ case "ld4w_z_p_bi_contiguous"_h:
form = form_4;
break;
- case LD4D_z_p_bi_contiguous:
- mnemonic = "ld4d";
- form = form_4;
- break;
- case LD4H_z_p_bi_contiguous:
- mnemonic = "ld4h";
- form = form_4;
- break;
- case LD4W_z_p_bi_contiguous:
- mnemonic = "ld4w";
- form = form_4;
- break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadMultipleStructures_ScalarPlusScalar)";
-
- const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
- const char *form_3 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl/z, ['Xns, 'Xm'NSveS]";
- const char *form_4 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
- "'Pgl/z, ['Xns, 'Xm'NSveS]";
-
- switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
- case LD2B_z_p_br_contiguous:
- mnemonic = "ld2b";
- form = form_2;
- break;
- case LD2D_z_p_br_contiguous:
- mnemonic = "ld2d";
- form = form_2;
- break;
- case LD2H_z_p_br_contiguous:
- mnemonic = "ld2h";
- form = form_2;
- break;
- case LD2W_z_p_br_contiguous:
- mnemonic = "ld2w";
- form = form_2;
- break;
- case LD3B_z_p_br_contiguous:
- mnemonic = "ld3b";
- form = form_3;
- break;
- case LD3D_z_p_br_contiguous:
- mnemonic = "ld3d";
- form = form_3;
- break;
- case LD3H_z_p_br_contiguous:
- mnemonic = "ld3h";
+ const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}";
+ const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}";
+ const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}";
+ const char *suffix = ", 'Pgl/z, ['Xns, 'Xm'NSveS]";
+
+ switch (form_hash_) {
+ case "ld3b_z_p_br_contiguous"_h:
+ case "ld3d_z_p_br_contiguous"_h:
+ case "ld3h_z_p_br_contiguous"_h:
+ case "ld3w_z_p_br_contiguous"_h:
form = form_3;
break;
- case LD3W_z_p_br_contiguous:
- mnemonic = "ld3w";
- form = form_3;
- break;
- case LD4B_z_p_br_contiguous:
- mnemonic = "ld4b";
- form = form_4;
- break;
- case LD4D_z_p_br_contiguous:
- mnemonic = "ld4d";
- form = form_4;
- break;
- case LD4H_z_p_br_contiguous:
- mnemonic = "ld4h";
- form = form_4;
- break;
- case LD4W_z_p_br_contiguous:
- mnemonic = "ld4w";
+ case "ld4b_z_p_br_contiguous"_h:
+ case "ld4d_z_p_br_contiguous"_h:
+ case "ld4h_z_p_br_contiguous"_h:
+ case "ld4w_z_p_br_contiguous"_h:
form = form_4;
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadPredicateRegister)";
-
- switch (instr->Mask(SVELoadPredicateRegisterMask)) {
- case LDR_p_bi:
- mnemonic = "ldr";
- if (instr->Mask(0x003f1c00) == 0) {
- form = "'Pd, ['Xns]";
- } else {
- form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
- }
- break;
- default:
- break;
+ const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Pd, ['Xns]";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVELoadVectorRegister)";
-
- switch (instr->Mask(SVELoadVectorRegisterMask)) {
- case LDR_z_bi:
- mnemonic = "ldr";
- if (instr->Mask(0x003f1c00) == 0) {
- form = "'Zd, ['Xns]";
- } else {
- form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
- }
- break;
- default:
- break;
+ const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Zd, ['Xns]";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.b, p'u1310/'?04:mz, 'Pn.b";
-
- switch (instr->Mask(SVEPartitionBreakConditionMask)) {
- case BRKAS_p_p_p_z:
- mnemonic = "brkas";
- break;
- case BRKA_p_p_p:
- mnemonic = "brka";
- break;
- case BRKBS_p_p_p_z:
- mnemonic = "brkbs";
- break;
- case BRKB_p_p_p:
- mnemonic = "brkb";
- break;
- default:
- form = "(SVEPartitionBreakCondition)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/'?04:mz, 'Pn.b");
}
void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pn.'t, 'Pm.'t";
-
- switch (instr->Mask(SVEPermutePredicateElementsMask)) {
- case TRN1_p_pp:
- mnemonic = "trn1";
- break;
- case TRN2_p_pp:
- mnemonic = "trn2";
- break;
- case UZP1_p_pp:
- mnemonic = "uzp1";
- break;
- case UZP2_p_pp:
- mnemonic = "uzp2";
- break;
- case ZIP1_p_pp:
- mnemonic = "zip1";
- break;
- case ZIP2_p_pp:
- mnemonic = "zip2";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t, 'Pm.'t");
}
void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateFirstActive)";
-
- switch (instr->Mask(SVEPredicateFirstActiveMask)) {
- case PFIRST_p_p_p:
- mnemonic = "pfirst";
- form = "'Pd.b, 'Pn, 'Pd.b";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn, 'Pd.b");
}
void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateReadFromFFR_Unpredicated)";
-
- switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
- case RDFFR_p_f:
- mnemonic = "rdffr";
- form = "'Pd.b";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b");
}
void Disassembler::VisitSVEPredicateTest(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateTest)";
-
- switch (instr->Mask(SVEPredicateTestMask)) {
- case PTEST_p_p:
- mnemonic = "ptest";
- form = "p'u1310, 'Pn.b";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "p'u1310, 'Pn.b");
}
void Disassembler::VisitSVEPredicateZero(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateZero)";
-
- switch (instr->Mask(SVEPredicateZeroMask)) {
- case PFALSE_p:
- mnemonic = "pfalse";
- form = "'Pd.b";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b");
}
void Disassembler::VisitSVEPropagateBreakToNextPartition(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b";
-
- switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
- case BRKNS_p_p_pp:
- mnemonic = "brkns";
- break;
- case BRKN_p_p_pp:
- mnemonic = "brkn";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b");
}
void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEReversePredicateElements)";
-
- switch (instr->Mask(SVEReversePredicateElementsMask)) {
- case REV_p_p:
- mnemonic = "rev";
- form = "'Pd.'t, 'Pn.'t";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t");
}
void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEReverseVectorElements)";
-
- switch (instr->Mask(SVEReverseVectorElementsMask)) {
- case REV_z_z:
- mnemonic = "rev";
- form = "'Zd.'t, 'Zn.'t";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) {
@@ -7847,7 +4810,6 @@ void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) {
void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = IncDecFormHelper(instr,
"'R20d, 'Ipc, mul #'u1916+1",
"'R20d, 'Ipc",
@@ -7857,399 +4819,113 @@ void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount(
"'Xd, 'Wd, 'Ipc",
"'Xd, 'Wd");
- switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
- case SQDECB_r_rs_sx:
- mnemonic = "sqdecb";
- form = form_sx;
- break;
- case SQDECD_r_rs_sx:
- mnemonic = "sqdecd";
- form = form_sx;
- break;
- case SQDECH_r_rs_sx:
- mnemonic = "sqdech";
+ switch (form_hash_) {
+ case "sqdecb_r_rs_sx"_h:
+ case "sqdecd_r_rs_sx"_h:
+ case "sqdech_r_rs_sx"_h:
+ case "sqdecw_r_rs_sx"_h:
+ case "sqincb_r_rs_sx"_h:
+ case "sqincd_r_rs_sx"_h:
+ case "sqinch_r_rs_sx"_h:
+ case "sqincw_r_rs_sx"_h:
form = form_sx;
break;
- case SQDECW_r_rs_sx:
- mnemonic = "sqdecw";
- form = form_sx;
- break;
- case SQINCB_r_rs_sx:
- mnemonic = "sqincb";
- form = form_sx;
- break;
- case SQINCD_r_rs_sx:
- mnemonic = "sqincd";
- form = form_sx;
- break;
- case SQINCH_r_rs_sx:
- mnemonic = "sqinch";
- form = form_sx;
- break;
- case SQINCW_r_rs_sx:
- mnemonic = "sqincw";
- form = form_sx;
- break;
- case SQDECB_r_rs_x:
- mnemonic = "sqdecb";
- break;
- case SQDECD_r_rs_x:
- mnemonic = "sqdecd";
- break;
- case SQDECH_r_rs_x:
- mnemonic = "sqdech";
- break;
- case SQDECW_r_rs_x:
- mnemonic = "sqdecw";
- break;
- case SQINCB_r_rs_x:
- mnemonic = "sqincb";
- break;
- case SQINCD_r_rs_x:
- mnemonic = "sqincd";
- break;
- case SQINCH_r_rs_x:
- mnemonic = "sqinch";
- break;
- case SQINCW_r_rs_x:
- mnemonic = "sqincw";
- break;
- case UQDECB_r_rs_uw:
- case UQDECB_r_rs_x:
- mnemonic = "uqdecb";
- break;
- case UQDECD_r_rs_uw:
- case UQDECD_r_rs_x:
- mnemonic = "uqdecd";
- break;
- case UQDECH_r_rs_uw:
- case UQDECH_r_rs_x:
- mnemonic = "uqdech";
- break;
- case UQDECW_r_rs_uw:
- case UQDECW_r_rs_x:
- mnemonic = "uqdecw";
- break;
- case UQINCB_r_rs_uw:
- case UQINCB_r_rs_x:
- mnemonic = "uqincb";
- break;
- case UQINCD_r_rs_uw:
- case UQINCD_r_rs_x:
- mnemonic = "uqincd";
- break;
- case UQINCH_r_rs_uw:
- case UQINCH_r_rs_x:
- mnemonic = "uqinch";
- break;
- case UQINCW_r_rs_uw:
- case UQINCW_r_rs_x:
- mnemonic = "uqincw";
- break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVESaturatingIncDecVectorByElementCount(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = IncDecFormHelper(instr,
"'Zd.'t, 'Ipc, mul #'u1916+1",
"'Zd.'t, 'Ipc",
"'Zd.'t");
-
- switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
- case SQDECD_z_zs:
- mnemonic = "sqdecd";
- break;
- case SQDECH_z_zs:
- mnemonic = "sqdech";
- break;
- case SQDECW_z_zs:
- mnemonic = "sqdecw";
- break;
- case SQINCD_z_zs:
- mnemonic = "sqincd";
- break;
- case SQINCH_z_zs:
- mnemonic = "sqinch";
- break;
- case SQINCW_z_zs:
- mnemonic = "sqincw";
- break;
- case UQDECD_z_zs:
- mnemonic = "uqdecd";
- break;
- case UQDECH_z_zs:
- mnemonic = "uqdech";
- break;
- case UQDECW_z_zs:
- mnemonic = "uqdecw";
- break;
- case UQINCD_z_zs:
- mnemonic = "uqincd";
- break;
- case UQINCH_z_zs:
- mnemonic = "uqinch";
- break;
- case UQINCW_z_zs:
- mnemonic = "uqincw";
- break;
- default:
- form = "(SVEElementCount)";
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEStoreMultipleStructures_ScalarPlusImm)";
-
- const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
- const char *form_3 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns'ISveSvl]";
- const char *form_4 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
- "'Pgl, ['Xns'ISveSvl]";
-
- switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
- case ST2B_z_p_bi_contiguous:
- mnemonic = "st2b";
- form = form_2;
- break;
- case ST2H_z_p_bi_contiguous:
- mnemonic = "st2h";
- form = form_2;
- break;
- case ST2W_z_p_bi_contiguous:
- mnemonic = "st2w";
- form = form_2;
- break;
- case ST2D_z_p_bi_contiguous:
- mnemonic = "st2d";
- form = form_2;
- break;
- case ST3B_z_p_bi_contiguous:
- mnemonic = "st3b";
- form = form_3;
- break;
- case ST3H_z_p_bi_contiguous:
- mnemonic = "st3h";
+ const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}";
+ const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}";
+ const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}";
+ const char *suffix = ", 'Pgl, ['Xns'ISveSvl]";
+
+ switch (form_hash_) {
+ case "st3b_z_p_bi_contiguous"_h:
+ case "st3h_z_p_bi_contiguous"_h:
+ case "st3w_z_p_bi_contiguous"_h:
+ case "st3d_z_p_bi_contiguous"_h:
form = form_3;
break;
- case ST3W_z_p_bi_contiguous:
- mnemonic = "st3w";
- form = form_3;
- break;
- case ST3D_z_p_bi_contiguous:
- mnemonic = "st3d";
- form = form_3;
- break;
- case ST4B_z_p_bi_contiguous:
- mnemonic = "st4b";
- form = form_4;
- break;
- case ST4H_z_p_bi_contiguous:
- mnemonic = "st4h";
+ case "st4b_z_p_bi_contiguous"_h:
+ case "st4h_z_p_bi_contiguous"_h:
+ case "st4w_z_p_bi_contiguous"_h:
+ case "st4d_z_p_bi_contiguous"_h:
form = form_4;
break;
- case ST4W_z_p_bi_contiguous:
- mnemonic = "st4w";
- form = form_4;
- break;
- case ST4D_z_p_bi_contiguous:
- mnemonic = "st4d";
- form = form_4;
- break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEStoreMultipleStructures_ScalarPlusScalar)";
-
- const char *form_2 = "{'Zt.'tmsz, 'Zt2.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
- const char *form_3 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}, 'Pgl, ['Xns, 'Xm'NSveS]";
- const char *form_4 =
- "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}, "
- "'Pgl, ['Xns, 'Xm'NSveS]";
-
- switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
- case ST2B_z_p_br_contiguous:
- mnemonic = "st2b";
- form = form_2;
- break;
- case ST2D_z_p_br_contiguous:
- mnemonic = "st2d";
- form = form_2;
- break;
- case ST2H_z_p_br_contiguous:
- mnemonic = "st2h";
- form = form_2;
- break;
- case ST2W_z_p_br_contiguous:
- mnemonic = "st2w";
- form = form_2;
- break;
- case ST3B_z_p_br_contiguous:
- mnemonic = "st3b";
+ const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}";
+ const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}";
+ const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}";
+ const char *suffix = ", 'Pgl, ['Xns, 'Xm'NSveS]";
+
+ switch (form_hash_) {
+ case "st3b_z_p_br_contiguous"_h:
+ case "st3d_z_p_br_contiguous"_h:
+ case "st3h_z_p_br_contiguous"_h:
+ case "st3w_z_p_br_contiguous"_h:
form = form_3;
break;
- case ST3D_z_p_br_contiguous:
- mnemonic = "st3d";
- form = form_3;
- break;
- case ST3H_z_p_br_contiguous:
- mnemonic = "st3h";
- form = form_3;
- break;
- case ST3W_z_p_br_contiguous:
- mnemonic = "st3w";
- form = form_3;
- break;
- case ST4B_z_p_br_contiguous:
- mnemonic = "st4b";
+ case "st4b_z_p_br_contiguous"_h:
+ case "st4d_z_p_br_contiguous"_h:
+ case "st4h_z_p_br_contiguous"_h:
+ case "st4w_z_p_br_contiguous"_h:
form = form_4;
break;
- case ST4D_z_p_br_contiguous:
- mnemonic = "st4d";
- form = form_4;
- break;
- case ST4H_z_p_br_contiguous:
- mnemonic = "st4h";
- form = form_4;
- break;
- case ST4W_z_p_br_contiguous:
- mnemonic = "st4w";
- form = form_4;
- break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEStorePredicateRegister)";
-
- switch (instr->Mask(SVEStorePredicateRegisterMask)) {
- case STR_p_bi:
- mnemonic = "str";
- if (instr->Mask(0x003f1c00) == 0) {
- form = "'Pd, ['Xns]";
- } else {
- form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
- }
- break;
- default:
- break;
+ const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Pd, ['Xns]";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEStoreVectorRegister)";
-
- switch (instr->Mask(SVEStoreVectorRegisterMask)) {
- case STR_z_bi:
- mnemonic = "str";
- if (instr->Mask(0x003f1c00) == 0) {
- form = "'Zd, ['Xns]";
- } else {
- form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
- }
- break;
- default:
- break;
+ const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]";
+ if (instr->Mask(0x003f1c00) == 0) {
+ form = "'Zd, ['Xns]";
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVETableLookup(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVETableLookup)";
-
- switch (instr->Mask(SVETableLookupMask)) {
- case TBL_z_zz_1:
- mnemonic = "tbl";
- form = "'Zd.'t, {'Zn.'t}, 'Zm.'t";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, {'Zn.'t}, 'Zm.'t");
}
void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.h, 'Pn.b";
-
- switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
- case PUNPKHI_p_p:
- mnemonic = "punpkhi";
- break;
- case PUNPKLO_p_p:
- mnemonic = "punpklo";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.h, 'Pn.b");
}
void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zn.'th";
-
if (instr->GetSVESize() == 0) {
// The lowest lane size of the destination vector is H-sized lane.
- Format(instr, "unallocated", "(SVEUnpackVectorElements)");
- return;
- }
-
- switch (instr->Mask(SVEUnpackVectorElementsMask)) {
- case SUNPKHI_z_z:
- mnemonic = "sunpkhi";
- break;
- case SUNPKLO_z_z:
- mnemonic = "sunpklo";
- break;
- case UUNPKHI_z_z:
- mnemonic = "uunpkhi";
- break;
- case UUNPKLO_z_z:
- mnemonic = "uunpklo";
- break;
- default:
- break;
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'th");
}
- Format(instr, mnemonic, form);
}
-void Disassembler::VisitSVEVectorSplice_Destructive(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEVectorSplice_Destructive)";
-
- switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
- case SPLICE_z_p_zz_des:
- mnemonic = "splice";
- form = "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+void Disassembler::VisitSVEVectorSplice(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t");
}
void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) {
@@ -8314,43 +4990,38 @@ void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) {
(instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19);
unsigned lane_size = instr->GetSVESize();
- switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
- case ASR_z_zi:
- if (tsize != 0) {
- // The tsz field must not be zero.
- mnemonic = "asr";
- form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
- }
- break;
- case ASR_z_zw:
- if (lane_size <= kSRegSizeInBytesLog2) {
- mnemonic = "asr";
- form = "'Zd.'t, 'Zn.'t, 'Zm.d";
- }
- break;
- case LSL_z_zi:
+ const char *suffix = NULL;
+ const char *form_i = "'Zd.'tszs, 'Zn.'tszs, ";
+
+ switch (form_hash_) {
+ case "asr_z_zi"_h:
+ case "lsr_z_zi"_h:
+ case "sri_z_zzi"_h:
+ case "srsra_z_zi"_h:
+ case "ssra_z_zi"_h:
+ case "ursra_z_zi"_h:
+ case "usra_z_zi"_h:
if (tsize != 0) {
// The tsz field must not be zero.
- mnemonic = "lsl";
- form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSver";
- }
- break;
- case LSL_z_zw:
- if (lane_size <= kSRegSizeInBytesLog2) {
- mnemonic = "lsl";
- form = "'Zd.'t, 'Zn.'t, 'Zm.d";
+ mnemonic = mnemonic_.c_str();
+ form = form_i;
+ suffix = "'ITriSves";
}
break;
- case LSR_z_zi:
+ case "lsl_z_zi"_h:
+ case "sli_z_zzi"_h:
if (tsize != 0) {
// The tsz field must not be zero.
- mnemonic = "lsr";
- form = "'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+ mnemonic = mnemonic_.c_str();
+ form = form_i;
+ suffix = "'ITriSver";
}
break;
- case LSR_z_zw:
+ case "asr_z_zw"_h:
+ case "lsl_z_zw"_h:
+ case "lsr_z_zw"_h:
if (lane_size <= kSRegSizeInBytesLog2) {
- mnemonic = "lsr";
+ mnemonic = mnemonic_.c_str();
form = "'Zd.'t, 'Zn.'t, 'Zm.d";
}
break;
@@ -8358,842 +5029,300 @@ void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) {
break;
}
- Format(instr, mnemonic, form);
+ Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitSVEElementCount(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form =
IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd");
-
- switch (instr->Mask(SVEElementCountMask)) {
- case CNTB_r_s:
- mnemonic = "cntb";
- break;
- case CNTD_r_s:
- mnemonic = "cntd";
- break;
- case CNTH_r_s:
- mnemonic = "cnth";
- break;
- case CNTW_r_s:
- mnemonic = "cntw";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPAccumulatingReduction)";
-
- switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
- case FADDA_v_p_z:
- mnemonic = "fadda";
- form = "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
-
- switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
- case FADD_z_zz:
- mnemonic = "fadd";
- break;
- case FMUL_z_zz:
- mnemonic = "fmul";
- break;
- case FRECPS_z_zz:
- mnemonic = "frecps";
- break;
- case FRSQRTS_z_zz:
- mnemonic = "frsqrts";
- break;
- case FSUB_z_zz:
- mnemonic = "fsub";
- break;
- case FTSMUL_z_zz:
- mnemonic = "ftsmul";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
-
- switch (instr->Mask(SVEFPCompareVectorsMask)) {
- case FACGE_p_p_zz:
- mnemonic = "facge";
- break;
- case FACGT_p_p_zz:
- mnemonic = "facgt";
- break;
- case FCMEQ_p_p_zz:
- mnemonic = "fcmeq";
- break;
- case FCMGE_p_p_zz:
- mnemonic = "fcmge";
- break;
- case FCMGT_p_p_zz:
- mnemonic = "fcmgt";
- break;
- case FCMNE_p_p_zz:
- mnemonic = "fcmne";
- break;
- case FCMUO_p_p_zz:
- mnemonic = "fcmuo";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0";
-
- switch (instr->Mask(SVEFPCompareWithZeroMask)) {
- case FCMEQ_p_p_z0:
- mnemonic = "fcmeq";
- break;
- case FCMGE_p_p_z0:
- mnemonic = "fcmge";
- break;
- case FCMGT_p_p_z0:
- mnemonic = "fcmgt";
- break;
- case FCMLE_p_p_z0:
- mnemonic = "fcmle";
- break;
- case FCMLT_p_p_z0:
- mnemonic = "fcmlt";
- break;
- case FCMNE_p_p_z0:
- mnemonic = "fcmne";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPComplexAddition)";
-
- switch (instr->Mask(SVEFPComplexAdditionMask)) {
- case FCADD_z_p_zz:
- mnemonic = "fcadd";
- if (instr->ExtractBit(16) == 0) {
- form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #90";
- } else {
- form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #270";
- }
- break;
- default:
- break;
+ // Bit 15 is always set, so this gives 90 * 1 or 3.
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #'u1615*90";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, form);
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPComplexMulAdd)";
- const char *suffix = NULL;
-
- const char *fcmla_constants[] = {"0", "90", "180", "270"};
-
- switch (instr->Mask(SVEFPComplexMulAddMask)) {
- case FCMLA_z_p_zzz:
- mnemonic = "fcmla";
- form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #";
- suffix = fcmla_constants[instr->ExtractBits(14, 13)];
- break;
- default:
- break;
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #'u1413*90";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, form);
}
- Format(instr, mnemonic, form, suffix);
}
void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPComplexMulAddIndex)";
-
- const char *fcmla_constants[] = {"0", "90", "180", "270"};
- const char *suffix = fcmla_constants[instr->ExtractBits(11, 10)];
-
- switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
- case FCMLA_z_zzzi_h:
- mnemonic = "fcmla";
- form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #";
- break;
- case FCMLA_z_zzzi_s:
- mnemonic = "fcmla";
- form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #";
- break;
- default:
- suffix = NULL;
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019]";
+ const char *suffix = ", #'u1110*90";
+ switch (form_hash_) {
+ case "fcmla_z_zzzi_s"_h:
+ form = "'Zd.s, 'Zn.s, z'u1916.s['u2020]";
break;
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'t'u0400, 'Pgl, 'Zn.'t";
-
- switch (instr->Mask(SVEFPFastReductionMask)) {
- case FADDV_v_p_z:
- mnemonic = "faddv";
- break;
- case FMAXNMV_v_p_z:
- mnemonic = "fmaxnmv";
- break;
- case FMAXV_v_p_z:
- mnemonic = "fmaxv";
- break;
- case FMINNMV_v_p_z:
- mnemonic = "fminnmv";
- break;
- case FMINV_v_p_z:
- mnemonic = "fminv";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPMulIndex)";
-
- switch (instr->Mask(SVEFPMulIndexMask)) {
- case FMUL_z_zzi_d:
- mnemonic = "fmul";
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ switch (form_hash_) {
+ case "fmul_z_zzi_d"_h:
form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
break;
- case FMUL_z_zzi_h:
- case FMUL_z_zzi_h_i3h:
- mnemonic = "fmul";
- form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
- break;
- case FMUL_z_zzi_s:
- mnemonic = "fmul";
+ case "fmul_z_zzi_s"_h:
form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
-
- switch (instr->Mask(SVEFPMulAddMask)) {
- case FMAD_z_p_zzz:
- mnemonic = "fmad";
- break;
- case FMLA_z_p_zzz:
- mnemonic = "fmla";
- break;
- case FMLS_z_p_zzz:
- mnemonic = "fmls";
- break;
- case FMSB_z_p_zzz:
- mnemonic = "fmsb";
- break;
- case FNMAD_z_p_zzz:
- mnemonic = "fnmad";
- break;
- case FNMLA_z_p_zzz:
- mnemonic = "fnmla";
- break;
- case FNMLS_z_p_zzz:
- mnemonic = "fnmls";
- break;
- case FNMSB_z_p_zzz:
- mnemonic = "fnmsb";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEFPMulAddIndex)";
-
- switch (instr->Mask(SVEFPMulAddIndexMask)) {
- case FMLA_z_zzzi_d:
- mnemonic = "fmla";
- form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
- break;
- case FMLA_z_zzzi_s:
- mnemonic = "fmla";
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ switch (form_hash_) {
+ case "fmla_z_zzzi_s"_h:
+ case "fmls_z_zzzi_s"_h:
form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
break;
- case FMLS_z_zzzi_d:
- mnemonic = "fmls";
+ case "fmla_z_zzzi_d"_h:
+ case "fmls_z_zzzi_d"_h:
form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
break;
- case FMLS_z_zzzi_s:
- mnemonic = "fmls";
- form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
- break;
- case FMLA_z_zzzi_h:
- case FMLA_z_zzzi_h_i3h:
- mnemonic = "fmla";
- form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
- break;
- case FMLS_z_zzzi_h:
- case FMLS_z_zzzi_h_i3h:
- mnemonic = "fmls";
- form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
- break;
- default:
- break;
}
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zn.'t";
-
- switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
- case FRECPE_z_z:
- mnemonic = "frecpe";
- break;
- case FRSQRTE_z_z:
- mnemonic = "frsqrte";
- break;
- default:
- break;
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ VisitUnallocated(instr);
+ } else {
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t");
}
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIncDecByPredicateCount)";
-
- switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
- case DECP_r_p_r:
- case DECP_z_p_z:
- mnemonic = "decp";
- break;
- case INCP_r_p_r:
- case INCP_z_p_z:
- mnemonic = "incp";
- break;
- case SQDECP_r_p_r_sx:
- case SQDECP_r_p_r_x:
- case SQDECP_z_p_z:
- mnemonic = "sqdecp";
- break;
- case SQINCP_r_p_r_sx:
- case SQINCP_r_p_r_x:
- case SQINCP_z_p_z:
- mnemonic = "sqincp";
- break;
- case UQDECP_r_p_r_uw:
- case UQDECP_r_p_r_x:
- case UQDECP_z_p_z:
- mnemonic = "uqdecp";
- break;
- case UQINCP_r_p_r_uw:
- case UQINCP_r_p_r_x:
- case UQINCP_z_p_z:
- mnemonic = "uqincp";
- break;
- default:
- break;
- }
-
- switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
+ const char *form = "'Zd.'t, 'Pn";
+ switch (form_hash_) {
// <Xdn>, <Pg>.<T>
- case DECP_r_p_r:
- case INCP_r_p_r:
+ case "decp_r_p_r"_h:
+ case "incp_r_p_r"_h:
form = "'Xd, 'Pn.'t";
break;
- // <Zdn>.<T>, <Pg>
- case DECP_z_p_z:
- case INCP_z_p_z:
- case SQDECP_z_p_z:
- case SQINCP_z_p_z:
- case UQDECP_z_p_z:
- case UQINCP_z_p_z:
- form = "'Zd.'t, 'Pn";
- break;
// <Xdn>, <Pg>.<T>, <Wdn>
- case SQDECP_r_p_r_sx:
- case SQINCP_r_p_r_sx:
+ case "sqdecp_r_p_r_sx"_h:
+ case "sqincp_r_p_r_sx"_h:
form = "'Xd, 'Pn.'t, 'Wd";
break;
// <Xdn>, <Pg>.<T>
- case SQDECP_r_p_r_x:
- case SQINCP_r_p_r_x:
- case UQDECP_r_p_r_x:
- case UQINCP_r_p_r_x:
+ case "sqdecp_r_p_r_x"_h:
+ case "sqincp_r_p_r_x"_h:
+ case "uqdecp_r_p_r_x"_h:
+ case "uqincp_r_p_r_x"_h:
form = "'Xd, 'Pn.'t";
break;
// <Wdn>, <Pg>.<T>
- case UQDECP_r_p_r_uw:
- case UQINCP_r_p_r_uw:
+ case "uqdecp_r_p_r_uw"_h:
+ case "uqincp_r_p_r_uw"_h:
form = "'Wd, 'Pn.'t";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIndexGeneration)";
-
+ const char *form = "'Zd.'t, #'s0905, #'s2016";
bool w_inputs =
static_cast<unsigned>(instr->GetSVESize()) <= kWRegSizeInBytesLog2;
- switch (instr->Mask(SVEIndexGenerationMask)) {
- case INDEX_z_ii:
- mnemonic = "index";
- form = "'Zd.'t, #'s0905, #'s2016";
- break;
- case INDEX_z_ir:
- mnemonic = "index";
+ switch (form_hash_) {
+ case "index_z_ir"_h:
form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm";
break;
- case INDEX_z_ri:
- mnemonic = "index";
+ case "index_z_ri"_h:
form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016";
break;
- case INDEX_z_rr:
- mnemonic = "index";
+ case "index_z_rr"_h:
form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm";
break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
-
- switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
- case ADD_z_zz:
- mnemonic = "add";
- break;
- case SQADD_z_zz:
- mnemonic = "sqadd";
- break;
- case SQSUB_z_zz:
- mnemonic = "sqsub";
- break;
- case SUB_z_zz:
- mnemonic = "sub";
- break;
- case UQADD_z_zz:
- mnemonic = "uqadd";
- break;
- case UQSUB_z_zz:
- mnemonic = "uqsub";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t");
}
void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016";
-
- switch (instr->Mask(SVEIntCompareSignedImmMask)) {
- case CMPEQ_p_p_zi:
- mnemonic = "cmpeq";
- break;
- case CMPGE_p_p_zi:
- mnemonic = "cmpge";
- break;
- case CMPGT_p_p_zi:
- mnemonic = "cmpgt";
- break;
- case CMPLE_p_p_zi:
- mnemonic = "cmple";
- break;
- case CMPLT_p_p_zi:
- mnemonic = "cmplt";
- break;
- case CMPNE_p_p_zi:
- mnemonic = "cmpne";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016");
}
void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014";
-
- switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
- case CMPHI_p_p_zi:
- mnemonic = "cmphi";
- break;
- case CMPHS_p_p_zi:
- mnemonic = "cmphs";
- break;
- case CMPLO_p_p_zi:
- mnemonic = "cmplo";
- break;
- case CMPLS_p_p_zi:
- mnemonic = "cmpls";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014");
}
void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.d";
-
- switch (instr->Mask(SVEIntCompareVectorsMask)) {
- case CMPEQ_p_p_zw:
- mnemonic = "cmpeq";
- break;
- case CMPEQ_p_p_zz:
- mnemonic = "cmpeq";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- case CMPGE_p_p_zw:
- mnemonic = "cmpge";
- break;
- case CMPGE_p_p_zz:
- mnemonic = "cmpge";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- case CMPGT_p_p_zw:
- mnemonic = "cmpgt";
- break;
- case CMPGT_p_p_zz:
- mnemonic = "cmpgt";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- case CMPHI_p_p_zw:
- mnemonic = "cmphi";
- break;
- case CMPHI_p_p_zz:
- mnemonic = "cmphi";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- case CMPHS_p_p_zw:
- mnemonic = "cmphs";
- break;
- case CMPHS_p_p_zz:
- mnemonic = "cmphs";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- case CMPLE_p_p_zw:
- mnemonic = "cmple";
- break;
- case CMPLO_p_p_zw:
- mnemonic = "cmplo";
- break;
- case CMPLS_p_p_zw:
- mnemonic = "cmpls";
- break;
- case CMPLT_p_p_zw:
- mnemonic = "cmplt";
- break;
- case CMPNE_p_p_zw:
- mnemonic = "cmpne";
- break;
- case CMPNE_p_p_zz:
- mnemonic = "cmpne";
- form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
- break;
- default:
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.";
+ const char *suffix = "d";
+ switch (form_hash_) {
+ case "cmpeq_p_p_zz"_h:
+ case "cmpge_p_p_zz"_h:
+ case "cmpgt_p_p_zz"_h:
+ case "cmphi_p_p_zz"_h:
+ case "cmphs_p_p_zz"_h:
+ case "cmpne_p_p_zz"_h:
+ suffix = "'t";
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIntMulAddPredicated)";
-
- switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
- case MAD_z_p_zzz:
- mnemonic = "mad";
- form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
- break;
- case MLA_z_p_zzz:
- mnemonic = "mla";
- form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
- break;
- case MLS_z_p_zzz:
- mnemonic = "mls";
- form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t";
- break;
- case MSB_z_p_zzz:
- mnemonic = "msb";
- form = "'Zd.'t, 'Pgl/m, 'Zm.'t, 'Zn.'t";
- break;
- default:
+ const char *form = "'Zd.'t, 'Pgl/m, ";
+ const char *suffix = "'Zn.'t, 'Zm.'t";
+ switch (form_hash_) {
+ case "mad_z_p_zzz"_h:
+ case "msb_z_p_zzz"_h:
+ suffix = "'Zm.'t, 'Zn.'t";
break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEIntMulAddUnpredicated)";
-
if (static_cast<unsigned>(instr->GetSVESize()) >= kSRegSizeInBytesLog2) {
- form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq";
- switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
- case SDOT_z_zzz:
- mnemonic = "sdot";
- break;
- case UDOT_z_zzz:
- mnemonic = "udot";
- break;
- default:
- break;
- }
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'tq, 'Zm.'tq");
+ } else {
+ VisitUnallocated(instr);
}
-
- Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEMovprfx(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEMovprfx)";
-
- if (instr->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z) {
- mnemonic = "movprfx";
- form = "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t";
- }
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t");
}
void Disassembler::VisitSVEIntReduction(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "'Vdv, 'Pgl, 'Zn.'t";
-
- if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
- switch (instr->Mask(SVEIntReductionLogicalMask)) {
- case ANDV_r_p_z:
- mnemonic = "andv";
- break;
- case EORV_r_p_z:
- mnemonic = "eorv";
- break;
- case ORV_r_p_z:
- mnemonic = "orv";
- break;
- default:
- break;
- }
- } else {
- switch (instr->Mask(SVEIntReductionMask)) {
- case SADDV_r_p_z:
- mnemonic = "saddv";
- form = "'Dd, 'Pgl, 'Zn.'t";
- break;
- case SMAXV_r_p_z:
- mnemonic = "smaxv";
- break;
- case SMINV_r_p_z:
- mnemonic = "sminv";
- break;
- case UADDV_r_p_z:
- mnemonic = "uaddv";
- form = "'Dd, 'Pgl, 'Zn.'t";
- break;
- case UMAXV_r_p_z:
- mnemonic = "umaxv";
- break;
- case UMINV_r_p_z:
- mnemonic = "uminv";
- break;
- default:
- break;
- }
+ switch (form_hash_) {
+ case "saddv_r_p_z"_h:
+ case "uaddv_r_p_z"_h:
+ form = "'Dd, 'Pgl, 'Zn.'t";
+ break;
}
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEIntUnaryArithmeticPredicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
-
- switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
- case ABS_z_p_z:
- mnemonic = "abs";
- break;
- case CLS_z_p_z:
- mnemonic = "cls";
- break;
- case CLZ_z_p_z:
- mnemonic = "clz";
- break;
- case CNOT_z_p_z:
- mnemonic = "cnot";
- break;
- case CNT_z_p_z:
- mnemonic = "cnt";
- break;
- case FABS_z_p_z:
- mnemonic = "fabs";
- break;
- case FNEG_z_p_z:
- mnemonic = "fneg";
- break;
- case NEG_z_p_z:
- mnemonic = "neg";
- break;
- case NOT_z_p_z:
- mnemonic = "not";
- break;
- case SXTB_z_p_z:
- mnemonic = "sxtb";
- break;
- case SXTH_z_p_z:
- mnemonic = "sxth";
- break;
- case SXTW_z_p_z:
- mnemonic = "sxtw";
- break;
- case UXTB_z_p_z:
- mnemonic = "uxtb";
- break;
- case UXTH_z_p_z:
- mnemonic = "uxth";
- break;
- case UXTW_z_p_z:
- mnemonic = "uxtw";
- break;
- default:
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ switch (form_hash_) {
+ case "sxtw_z_p_z"_h:
+ case "uxtw_z_p_z"_h:
+ if (vform == kFormatVnS) {
+ VisitUnallocated(instr);
+ return;
+ }
+ VIXL_FALLTHROUGH();
+ case "sxth_z_p_z"_h:
+ case "uxth_z_p_z"_h:
+ if (vform == kFormatVnH) {
+ VisitUnallocated(instr);
+ return;
+ }
+ VIXL_FALLTHROUGH();
+ case "sxtb_z_p_z"_h:
+ case "uxtb_z_p_z"_h:
+ case "fabs_z_p_z"_h:
+ case "fneg_z_p_z"_h:
+ if (vform == kFormatVnB) {
+ VisitUnallocated(instr);
+ return;
+ }
break;
}
- Format(instr, mnemonic, form);
+
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t");
}
void Disassembler::VisitSVEMulIndex(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEMulIndex)";
+ const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
- switch (instr->Mask(SVEMulIndexMask)) {
- case SDOT_z_zzzi_d:
- mnemonic = "sdot";
- form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
- break;
- case SDOT_z_zzzi_s:
- mnemonic = "sdot";
- form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
- break;
- case UDOT_z_zzzi_d:
- mnemonic = "udot";
+ switch (form_hash_) {
+ case "sdot_z_zzzi_d"_h:
+ case "udot_z_zzzi_d"_h:
form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]";
break;
- case UDOT_z_zzzi_s:
- mnemonic = "udot";
- form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]";
- break;
- default:
- break;
}
- Format(instr, mnemonic, form);
+
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPermuteVectorExtract)";
-
- switch (instr->Mask(SVEPermuteVectorExtractMask)) {
- case EXT_z_zi_des:
- mnemonic = "ext";
- form = "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210");
}
void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
-
- switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
- case TRN1_z_zz:
- mnemonic = "trn1";
- break;
- case TRN2_z_zz:
- mnemonic = "trn2";
- break;
- case UZP1_z_zz:
- mnemonic = "uzp1";
- break;
- case UZP2_z_zz:
- mnemonic = "uzp2";
- break;
- case ZIP1_z_zz:
- mnemonic = "zip1";
- break;
- case ZIP2_z_zz:
- mnemonic = "zip2";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t");
}
void Disassembler::VisitSVEPredicateCount(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateCount)";
-
- switch (instr->Mask(SVEPredicateCountMask)) {
- case CNTP_r_p_p:
- mnemonic = "cntp";
- form = "'Xd, p'u1310, 'Pn.'t";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Xd, p'u1310, 'Pn.'t");
}
void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
+ const char *mnemonic = mnemonic_.c_str();
const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
int pd = instr->GetPd();
@@ -9201,304 +5330,127 @@ void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) {
int pm = instr->GetPm();
int pg = instr->ExtractBits(13, 10);
- switch (instr->Mask(SVEPredicateLogicalMask)) {
- case ANDS_p_p_pp_z:
- mnemonic = "ands";
+ switch (form_hash_) {
+ case "ands_p_p_pp_z"_h:
if (pn == pm) {
mnemonic = "movs";
form = "'Pd.b, p'u1310/z, 'Pn.b";
}
break;
- case AND_p_p_pp_z:
- mnemonic = "and";
+ case "and_p_p_pp_z"_h:
if (pn == pm) {
mnemonic = "mov";
form = "'Pd.b, p'u1310/z, 'Pn.b";
}
break;
- case BICS_p_p_pp_z:
- mnemonic = "bics";
- break;
- case BIC_p_p_pp_z:
- mnemonic = "bic";
- break;
- case EORS_p_p_pp_z:
- mnemonic = "eors";
+ case "eors_p_p_pp_z"_h:
if (pm == pg) {
mnemonic = "nots";
form = "'Pd.b, 'Pm/z, 'Pn.b";
}
break;
- case EOR_p_p_pp_z:
- mnemonic = "eor";
+ case "eor_p_p_pp_z"_h:
if (pm == pg) {
mnemonic = "not";
form = "'Pd.b, 'Pm/z, 'Pn.b";
}
break;
- case NANDS_p_p_pp_z:
- mnemonic = "nands";
- break;
- case NAND_p_p_pp_z:
- mnemonic = "nand";
- break;
- case NORS_p_p_pp_z:
- mnemonic = "nors";
- break;
- case NOR_p_p_pp_z:
- mnemonic = "nor";
- break;
- case ORNS_p_p_pp_z:
- mnemonic = "orns";
- break;
- case ORN_p_p_pp_z:
- mnemonic = "orn";
- break;
- case ORRS_p_p_pp_z:
- mnemonic = "orrs";
+ case "orrs_p_p_pp_z"_h:
if ((pn == pm) && (pn == pg)) {
mnemonic = "movs";
form = "'Pd.b, 'Pn.b";
}
break;
- case ORR_p_p_pp_z:
- mnemonic = "orr";
+ case "orr_p_p_pp_z"_h:
if ((pn == pm) && (pn == pg)) {
mnemonic = "mov";
form = "'Pd.b, 'Pn.b";
}
break;
- case SEL_p_p_pp:
+ case "sel_p_p_pp"_h:
if (pd == pm) {
mnemonic = "mov";
form = "'Pd.b, p'u1310/m, 'Pn.b";
} else {
- mnemonic = "sel";
form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b";
}
break;
- default:
- form = "(SVEPredicateLogical)";
- break;
}
Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) {
- // This group only contains PTRUE{S}, and there are no unallocated encodings.
- VIXL_STATIC_ASSERT(
- SVEPredicateInitializeMask ==
- (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
- VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
- (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
-
- const char *mnemonic = instr->ExtractBit(16) ? "ptrues" : "ptrue";
const char *form = "'Pd.'t, 'Ipc";
// Omit the pattern if it is the default ('ALL').
if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t";
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) {
- // This group only contains PNEXT, and there are no unallocated encodings.
- VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
- VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
-
- Format(instr, "pnext", "'Pd.'t, 'Pn, 'Pd.'t");
+ FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn, 'Pd.'t");
}
void Disassembler::VisitSVEPredicateReadFromFFR_Predicated(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEPredicateReadFromFFR_Predicated)";
- switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
- case RDFFR_p_p_f:
- case RDFFRS_p_p_f:
- mnemonic = instr->ExtractBit(22) ? "rdffrs" : "rdffr";
- form = "'Pd.b, 'Pn/z";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn/z");
}
void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b";
-
- switch (instr->Mask(SVEPropagateBreakMask)) {
- case BRKPAS_p_p_pp:
- mnemonic = "brkpas";
- break;
- case BRKPA_p_p_pp:
- mnemonic = "brkpa";
- break;
- case BRKPBS_p_p_pp:
- mnemonic = "brkpbs";
- break;
- case BRKPB_p_p_pp:
- mnemonic = "brkpb";
- break;
- default:
- break;
- }
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b");
}
void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "'Xds, 'Xms, #'s1005";
-
- switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
- case ADDPL_r_ri:
- mnemonic = "addpl";
- break;
- case ADDVL_r_ri:
- mnemonic = "addvl";
- break;
- default:
- form = "(SVEStackFrameAdjustment)";
- break;
- }
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Xds, 'Xms, #'s1005");
}
void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEStackFrameSize)";
-
- switch (instr->Mask(SVEStackFrameSizeMask)) {
- case RDVL_r_i:
- mnemonic = "rdvl";
- form = "'Xd, #'s1005";
- break;
- default:
- break;
- }
-
- Format(instr, mnemonic, form);
+ FormatWithDecodedMnemonic(instr, "'Xd, #'s1005");
}
void Disassembler::VisitSVEVectorSelect(const Instruction *instr) {
- const char *mnemonic = "unimplemented";
- const char *form = "(SVEVectorSelect)";
+ const char *mnemonic = mnemonic_.c_str();
+ const char *form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t";
- switch (instr->Mask(SVEVectorSelectMask)) {
- case SEL_z_p_zz:
- if (instr->GetRd() == instr->GetRm()) {
- mnemonic = "mov";
- form = "'Zd.'t, p'u1310/m, 'Zn.'t";
- } else {
- mnemonic = "sel";
- form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t";
- }
- break;
- default:
- break;
+ if (instr->GetRd() == instr->GetRm()) {
+ mnemonic = "mov";
+ form = "'Zd.'t, p'u1310/m, 'Zn.'t";
}
+
Format(instr, mnemonic, form);
}
void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns";
const char *suffix =
(instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]";
-
- switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
- case LD1B_z_p_bi_u16:
- case LD1B_z_p_bi_u32:
- case LD1B_z_p_bi_u64:
- case LD1B_z_p_bi_u8:
- mnemonic = "ld1b";
- break;
- case LD1D_z_p_bi_u64:
- mnemonic = "ld1d";
- break;
- case LD1H_z_p_bi_u16:
- case LD1H_z_p_bi_u32:
- case LD1H_z_p_bi_u64:
- mnemonic = "ld1h";
- break;
- case LD1SB_z_p_bi_s16:
- case LD1SB_z_p_bi_s32:
- case LD1SB_z_p_bi_s64:
- mnemonic = "ld1sb";
- break;
- case LD1SH_z_p_bi_s32:
- case LD1SH_z_p_bi_s64:
- mnemonic = "ld1sh";
- break;
- case LD1SW_z_p_bi_s64:
- mnemonic = "ld1sw";
- break;
- case LD1W_z_p_bi_u32:
- case LD1W_z_p_bi_u64:
- mnemonic = "ld1w";
- break;
- default:
- form = "(SVEContiguousLoad_ScalarPlusImm)";
- suffix = NULL;
- break;
- }
-
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar(
const Instruction *instr) {
- const char *mnemonic = "unimplemented";
const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm";
- const char *suffix = NULL;
-
- switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
- case LD1B_z_p_br_u16:
- case LD1B_z_p_br_u32:
- case LD1B_z_p_br_u64:
- case LD1B_z_p_br_u8:
- mnemonic = "ld1b";
- suffix = "]";
- break;
- case LD1D_z_p_br_u64:
- mnemonic = "ld1d";
- suffix = ", lsl #'u2423]";
- break;
- case LD1H_z_p_br_u16:
- case LD1H_z_p_br_u32:
- case LD1H_z_p_br_u64:
- mnemonic = "ld1h";
+ const char *suffix = "]";
+
+ switch (form_hash_) {
+ case "ld1h_z_p_br_u16"_h:
+ case "ld1h_z_p_br_u32"_h:
+ case "ld1h_z_p_br_u64"_h:
+ case "ld1w_z_p_br_u32"_h:
+ case "ld1w_z_p_br_u64"_h:
+ case "ld1d_z_p_br_u64"_h:
suffix = ", lsl #'u2423]";
break;
- case LD1SB_z_p_br_s16:
- case LD1SB_z_p_br_s32:
- case LD1SB_z_p_br_s64:
- mnemonic = "ld1sb";
- suffix = "]";
- break;
- case LD1SH_z_p_br_s32:
- case LD1SH_z_p_br_s64:
- mnemonic = "ld1sh";
+ case "ld1sh_z_p_br_s32"_h:
+ case "ld1sh_z_p_br_s64"_h:
suffix = ", lsl #1]";
break;
- case LD1SW_z_p_br_s64:
- mnemonic = "ld1sw";
+ case "ld1sw_z_p_br_s64"_h:
suffix = ", lsl #2]";
break;
- case LD1W_z_p_br_u32:
- case LD1W_z_p_br_u64:
- mnemonic = "ld1w";
- suffix = ", lsl #'u2423]";
- break;
- default:
- form = "(SVEContiguousLoad_ScalarPlusScalar)";
- suffix = NULL;
- break;
}
- Format(instr, mnemonic, form, suffix);
+ FormatWithDecodedMnemonic(instr, form, suffix);
}
void Disassembler::VisitReserved(const Instruction *instr) {
@@ -9507,7 +5459,6 @@ void Disassembler::VisitReserved(const Instruction *instr) {
Format(instr, "udf", "'IUdf");
}
-
void Disassembler::VisitUnimplemented(const Instruction *instr) {
Format(instr, "unimplemented", "(Unimplemented)");
}
@@ -9517,6 +5468,403 @@ void Disassembler::VisitUnallocated(const Instruction *instr) {
Format(instr, "unallocated", "(Unallocated)");
}
+void Disassembler::Visit(Metadata *metadata, const Instruction *instr) {
+ VIXL_ASSERT(metadata->count("form") > 0);
+ const std::string &form = (*metadata)["form"];
+ form_hash_ = Hash(form.c_str());
+ const FormToVisitorFnMap *fv = Disassembler::GetFormToVisitorFnMap();
+ FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
+ if (it == fv->end()) {
+ VisitUnimplemented(instr);
+ } else {
+ SetMnemonicFromForm(form);
+ (it->second)(this, instr);
+ }
+}
+
+void Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction *instr) {
+ const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
+ Format(instr, "unimplemented", "(PdT_PgZ_ZnT_ZmT)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction *instr) {
+ const char *form = "'Zd.b, {'Zn.b, 'Zn2.b}, #'u2016:1210";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdB_ZnB_ZmB(const Instruction *instr) {
+ const char *form = "'Zd.b, 'Zn.b, 'Zm.b";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(ZdB_ZnB_ZmB)");
+ }
+}
+
+void Disassembler::Disassemble_ZdD_PgM_ZnS(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Pgl/m, 'Zn.s";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdD_ZnD_ZmD(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.d, 'Zm.d";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdD_ZnD_ZmD_imm(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdD_ZnS_ZmS_imm(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdH_PgM_ZnS(const Instruction *instr) {
+ const char *form = "'Zd.h, 'Pgl/m, 'Zn.s";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdH_ZnH_ZmH_imm(const Instruction *instr) {
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdS_PgM_ZnD(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Pgl/m, 'Zn.d";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdS_PgM_ZnH(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Pgl/m, 'Zn.h";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdS_PgM_ZnS(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Pgl/m, 'Zn.s";
+ if (instr->GetSVEVectorFormat() == kFormatVnS) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(ZdS_PgM_ZnS)");
+ }
+}
+
+void Disassembler::Disassemble_ZdS_ZnH_ZmH_imm(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdS_ZnS_ZmS(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.s, 'Zm.s";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdS_ZnS_ZmS_imm(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSVEFlogb(const Instruction *instr) {
+ const char *form = "'Zd.'tf, 'Pgl/m, 'Zn.'tf";
+ if (instr->GetSVEVectorFormat(17) == kFormatVnB) {
+ Format(instr, "unimplemented", "(SVEFlogb)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdT_PgM_ZnT(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t";
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(ZdT_PgZ_ZnT_ZmT)");
+ }
+}
+
+void Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl, {'Zn.'t, 'Zn2.'t}";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction *instr) {
+ const char *form = "'Zd.'t, {'Zn.'t, 'Zn2.'t}, 'Zm.'t";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdT_ZnT_ZmT(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdT_ZnT_ZmTb(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'th";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ Format(instr, "unimplemented", "(ZdT_ZnT_ZmTb)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdT_ZnTb(const Instruction *instr) {
+ const char *form = "'Zd.'tszs, 'Zn.'tszd";
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int shift_dist = shift_and_lane_size.first;
+ int lane_size = shift_and_lane_size.second;
+ // Convert shift_dist from a right to left shift. Valid xtn instructions
+ // must have a left shift_dist equivalent of zero.
+ shift_dist = (8 << lane_size) - shift_dist;
+ if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
+ (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)) &&
+ (shift_dist == 0)) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(ZdT_ZnTb)");
+ }
+}
+
+void Disassembler::Disassemble_ZdT_ZnTb_ZmTb(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ // TODO: This is correct for saddlbt, ssublbt, subltb, which don't have
+ // b-lane sized form, and for pmull[b|t] as feature `SVEPmull128` isn't
+ // supported, but may need changes for other instructions reaching here.
+ Format(instr, "unimplemented", "(ZdT_ZnTb_ZmTb)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::DisassembleSVEAddSubHigh(const Instruction *instr) {
+ const char *form = "'Zd.'th, 'Zn.'t, 'Zm.'t";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ Format(instr, "unimplemented", "(SVEAddSubHigh)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::DisassembleSVEShiftLeftImm(const Instruction *instr) {
+ const char *form = "'Zd.'tszd, 'Zn.'tszs, 'ITriSver";
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
+ (lane_size <= static_cast<int>(kSRegSizeInBytesLog2))) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(SVEShiftLeftImm)");
+ }
+}
+
+void Disassembler::DisassembleSVEShiftRightImm(const Instruction *instr) {
+ const char *form = "'Zd.'tszs, 'Zn.'tszd, 'ITriSves";
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ if ((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
+ (lane_size <= static_cast<int>(kSRegSizeInBytesLog2))) {
+ Format(instr, mnemonic_.c_str(), form);
+ } else {
+ Format(instr, "unimplemented", "(SVEShiftRightImm)");
+ }
+}
+
+void Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const(
+ const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.h, z'u1916.h['u2020], #'u1110*90";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction *instr) {
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const(
+ const Instruction *instr) {
+ const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #'u1110*90";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnB_ZmB(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.b, 'Zm.b";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const(
+ const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019], #'u1110*90";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnH_ZmH(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.h, 'Zm.h";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const(
+ const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #'u1110*90";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaT_PgM_ZnTb(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'th";
+
+ if (instr->GetSVESize() == 0) {
+ // The lowest lane size of the destination vector is H-sized lane.
+ Format(instr, "unimplemented", "(Disassemble_ZdaT_PgM_ZnTb)");
+ return;
+ }
+
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSVEAddSubCarry(const Instruction *instr) {
+ const char *form = "'Zd.'?22:ds, 'Zn.'?22:ds, 'Zm.'?22:ds";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaT_ZnT_ZmT(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaT_ZnT_ZmT_const(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t, #'u1110*90";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq, #'u1110*90";
+ VectorFormat vform = instr->GetSVEVectorFormat();
+
+ if ((vform == kFormatVnB) || (vform == kFormatVnH)) {
+ Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb_const)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdnB_ZdnB(const Instruction *instr) {
+ const char *form = "'Zd.b, 'Zd.b";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdnB_ZdnB_ZmB(const Instruction *instr) {
+ const char *form = "'Zd.b, 'Zd.b, 'Zn.b";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSVEBitwiseTernary(const Instruction *instr) {
+ const char *form = "'Zd.d, 'Zd.d, 'Zm.d, 'Zn.d";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::Disassemble_ZdnS_ZdnS_ZmS(const Instruction *instr) {
+ const char *form = "'Zd.s, 'Zd.s, 'Zn.s";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSVEFPPair(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ Format(instr, "unimplemented", "(SVEFPPair)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t";
+ Format(instr, mnemonic_.c_str(), form);
+}
+
+void Disassembler::DisassembleSVEComplexIntAddition(const Instruction *instr) {
+ const char *form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #";
+ const char *suffix = (instr->ExtractBit(10) == 0) ? "90" : "270";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction *instr) {
+ const char *form = "'Zd.'tszs, 'Zd.'tszs, 'Zn.'tszs, 'ITriSves";
+ unsigned tsize =
+ (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19);
+
+ if (tsize == 0) {
+ Format(instr, "unimplemented", "(ZdnT_ZdnT_ZmT_const)");
+ } else {
+ Format(instr, mnemonic_.c_str(), form);
+ }
+}
+
+void Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction *instr) {
+ const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d";
+ const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::Disassemble_ZtD_Pg_ZnD_Xm(const Instruction *instr) {
+ const char *form = "{'Zt.d}, 'Pgl, ['Zn.d";
+ const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction *instr) {
+ const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s";
+ const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
+
+void Disassembler::Disassemble_ZtS_Pg_ZnS_Xm(const Instruction *instr) {
+ const char *form = "{'Zt.s}, 'Pgl, ['Zn.s";
+ const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]";
+ Format(instr, mnemonic_.c_str(), form, suffix);
+}
void Disassembler::ProcessOutput(const Instruction * /*instr*/) {
// The base disasm does nothing more than disassembling into a buffer.
@@ -9635,22 +5983,31 @@ void Disassembler::Format(const Instruction *instr,
const char *mnemonic,
const char *format0,
const char *format1) {
- VIXL_ASSERT(mnemonic != NULL);
- ResetOutput();
- Substitute(instr, mnemonic);
- if (format0 != NULL) {
- VIXL_ASSERT(buffer_pos_ < buffer_size_);
- buffer_[buffer_pos_++] = ' ';
- Substitute(instr, format0);
- if (format1 != NULL) {
- Substitute(instr, format1);
+ if ((mnemonic == NULL) || (format0 == NULL)) {
+ VisitUnallocated(instr);
+ } else {
+ ResetOutput();
+ Substitute(instr, mnemonic);
+ if (format0[0] != 0) { // Not a zero-length string.
+ VIXL_ASSERT(buffer_pos_ < buffer_size_);
+ buffer_[buffer_pos_++] = ' ';
+ Substitute(instr, format0);
+ // TODO: consider using a zero-length string here, too.
+ if (format1 != NULL) {
+ Substitute(instr, format1);
+ }
}
+ VIXL_ASSERT(buffer_pos_ < buffer_size_);
+ buffer_[buffer_pos_] = 0;
+ ProcessOutput(instr);
}
- VIXL_ASSERT(buffer_pos_ < buffer_size_);
- buffer_[buffer_pos_] = 0;
- ProcessOutput(instr);
}
+void Disassembler::FormatWithDecodedMnemonic(const Instruction *instr,
+ const char *format0,
+ const char *format1) {
+ Format(instr, mnemonic_.c_str(), format0, format1);
+}
void Disassembler::Substitute(const Instruction *instr, const char *string) {
char chr = *string++;
@@ -9759,8 +6116,8 @@ std::pair<unsigned, unsigned> Disassembler::GetRegNumForField(
case '2':
case '3':
case '4':
- if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // Vt2/3/4, Zt2/3/4
- VIXL_ASSERT(field[0] == 't');
+ if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // t2/3/4, n2/3/4
+ VIXL_ASSERT((field[0] == 't') || (field[0] == 'n'));
reg_num = (reg_num + field[1] - '1') % 32;
field_len++;
} else {
@@ -10000,10 +6357,6 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
}
return 3;
}
- case 'F': { // ILF(CNR) - Immediate Rotation Value for Complex Numbers
- AppendToOutput("#%" PRId32, instr->GetImmRotFcmlaSca() * 90);
- return strlen("ILFCNR");
- }
case 'A': { // ILA - Immediate Load with pointer authentication.
if (instr->GetImmLSPAC() != 0) {
AppendToOutput(", #%" PRId32, instr->GetImmLSPAC());
@@ -10084,7 +6437,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
return 8;
}
case 'r': {
- // SVE unpredicated shift immediate encoding, lsl.
+ // SVE unpredicated shift immediate encoding, left shifts.
std::pair<int, int> shift_and_lane_size =
instr->GetSVEImmShiftAndLaneSizeLog2(
/* is_predicated = */ false);
@@ -10093,7 +6446,7 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
return 8;
}
case 's': {
- // SVE unpredicated shift immediate encoding, asr and lsr.
+ // SVE unpredicated shift immediate encoding, right shifts.
std::pair<int, int> shift_and_lane_size =
instr->GetSVEImmShiftAndLaneSizeLog2(
/* is_predicated = */ false);
@@ -10146,13 +6499,13 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
}
case 's': { // Is - Shift (immediate).
switch (format[2]) {
- case '1': { // Is1 - SSHR.
+ case 'R': { // IsR - right shifts.
int shift = 16 << HighestSetBitPosition(instr->GetImmNEONImmh());
shift -= instr->GetImmNEONImmhImmb();
AppendToOutput("#%d", shift);
return 3;
}
- case '2': { // Is2 - SLI.
+ case 'L': { // IsL - left shifts.
int shift = instr->GetImmNEONImmhImmb();
shift -= 8 << HighestSetBitPosition(instr->GetImmNEONImmh());
AppendToOutput("#%d", shift);
@@ -10174,43 +6527,35 @@ int Disassembler::SubstituteImmediateField(const Instruction *instr,
}
case 'V': { // Immediate Vector.
switch (format[2]) {
- case 'F': {
- switch (format[5]) {
- // Convert 'rot' bit encodings into equivalent angle rotation
- case 'A':
- AppendToOutput("#%" PRId32,
- instr->GetImmRotFcadd() == 1 ? 270 : 90);
- break;
- case 'M':
- AppendToOutput("#%" PRId32, instr->GetImmRotFcmlaVec() * 90);
- break;
- }
- return strlen("IVFCN") + 1;
- }
case 'E': { // IVExtract.
AppendToOutput("#%" PRId32, instr->GetImmNEONExt());
return 9;
}
case 'B': { // IVByElemIndex.
int ret = strlen("IVByElemIndex");
- int vm_index = (instr->GetNEONH() << 1) | instr->GetNEONL();
+ uint32_t vm_index = instr->GetNEONH() << 2;
+ vm_index |= instr->GetNEONL() << 1;
+ vm_index |= instr->GetNEONM();
+
static const char *format_rot = "IVByElemIndexRot";
static const char *format_fhm = "IVByElemIndexFHM";
- bool is_fhm = strncmp(format, format_fhm, strlen(format_fhm)) == 0;
if (strncmp(format, format_rot, strlen(format_rot)) == 0) {
// FCMLA uses 'H' bit index when SIZE is 2, else H:L
+ VIXL_ASSERT((instr->GetNEONSize() == 1) ||
+ (instr->GetNEONSize() == 2));
+ vm_index >>= instr->GetNEONSize();
+ ret = static_cast<int>(strlen(format_rot));
+ } else if (strncmp(format, format_fhm, strlen(format_fhm)) == 0) {
+ // Nothing to do - FMLAL and FMLSL use H:L:M.
+ ret = static_cast<int>(strlen(format_fhm));
+ } else {
if (instr->GetNEONSize() == 2) {
- vm_index = instr->GetNEONH();
+ // S-sized elements use H:L.
+ vm_index >>= 1;
+ } else if (instr->GetNEONSize() == 3) {
+ // D-sized elements use H.
+ vm_index >>= 2;
}
- ret = static_cast<int>(strlen(format_rot));
- } else if (is_fhm || (instr->GetNEONSize() == 0)) {
- // Half-precision FP ops use H:L:M bit index
- // Widening operations with H-sized operands also use H:L:M.
- vm_index = (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) |
- instr->GetNEONM();
- if (is_fhm) ret = static_cast<int>(strlen(format_fhm));
- } else if (instr->GetNEONSize() == 1) {
- vm_index = (vm_index << 1) | instr->GetNEONM();
}
AppendToOutput("%d", vm_index);
return ret;
@@ -10660,10 +7005,10 @@ int Disassembler::SubstitutePrefetchField(const Instruction *instr,
int placeholder_length = is_sve ? 9 : 6;
static const char *stream_options[] = {"keep", "strm"};
- auto get_hints = [](bool is_sve) -> std::vector<std::string> {
+ auto get_hints = [](bool want_sve_hint) -> std::vector<std::string> {
static const std::vector<std::string> sve_hints = {"ld", "st"};
static const std::vector<std::string> core_hints = {"ld", "li", "st"};
- return (is_sve) ? sve_hints : core_hints;
+ return (want_sve_hint) ? sve_hints : core_hints;
};
std::vector<std::string> hints = get_hints(is_sve);
@@ -10807,12 +7152,13 @@ int Disassembler::SubstituteSVESize(const Instruction *instr,
VIXL_ASSERT(format[0] == 't');
static const char sizes[] = {'b', 'h', 's', 'd', 'q'};
- // TODO: only the most common case for <size> is supported at the moment,
- // and even then, the RESERVED values are handled as if they're not
- // reserved.
unsigned size_in_bytes_log2 = instr->GetSVESize();
int placeholder_length = 1;
switch (format[1]) {
+ case 'f': // 'tf - FP size encoded in <18:17>
+ placeholder_length++;
+ size_in_bytes_log2 = instr->ExtractBits(18, 17);
+ break;
case 'l':
placeholder_length++;
if (format[2] == 's') {
@@ -10835,27 +7181,25 @@ int Disassembler::SubstituteSVESize(const Instruction *instr,
placeholder_length += 3;
size_in_bytes_log2 = instr->ExtractBits(24, 23);
break;
+ case 'i': { // 'ti: indices.
+ std::pair<int, int> index_and_lane_size =
+ instr->GetSVEPermuteIndexAndLaneSizeLog2();
+ placeholder_length++;
+ size_in_bytes_log2 = index_and_lane_size.second;
+ break;
+ }
case 's':
if (format[2] == 'z') {
- VIXL_ASSERT((format[3] == 'x') || (format[3] == 's') ||
- (format[3] == 'p'));
- if (format[3] == 'x') {
- // 'tszx: Indexes.
- std::pair<int, int> index_and_lane_size =
- instr->GetSVEPermuteIndexAndLaneSizeLog2();
- size_in_bytes_log2 = index_and_lane_size.second;
- } else if (format[3] == 'p') {
- // 'tszp: Predicated shifts.
- std::pair<int, int> shift_and_lane_size =
- instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
- size_in_bytes_log2 = shift_and_lane_size.second;
- } else {
- // 'tszs: Unpredicated shifts.
- std::pair<int, int> shift_and_lane_size =
- instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
- size_in_bytes_log2 = shift_and_lane_size.second;
+ VIXL_ASSERT((format[3] == 'p') || (format[3] == 's') ||
+ (format[3] == 'd'));
+ bool is_predicated = (format[3] == 'p');
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(is_predicated);
+ size_in_bytes_log2 = shift_and_lane_size.second;
+ if (format[3] == 'd') { // Double size lanes.
+ size_in_bytes_log2++;
}
- placeholder_length += 3; // skip `sz[x|s]`
+ placeholder_length += 3; // skip "sz(p|s|d)"
}
break;
case 'h':
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index 176fb790..aa5348dc 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -27,6 +27,8 @@
#ifndef VIXL_AARCH64_DISASM_AARCH64_H
#define VIXL_AARCH64_DISASM_AARCH64_H
+#include <functional>
+#include <unordered_map>
#include <utility>
#include "../globals-vixl.h"
@@ -34,6 +36,7 @@
#include "cpu-features-auditor-aarch64.h"
#include "decoder-aarch64.h"
+#include "decoder-visitor-map-aarch64.h"
#include "instructions-aarch64.h"
#include "operands-aarch64.h"
@@ -47,11 +50,9 @@ class Disassembler : public DecoderVisitor {
virtual ~Disassembler();
char* GetOutput();
-// Declare all Visitor functions.
-#define DECLARE(A) \
- virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
- VISITOR_LIST(DECLARE)
-#undef DECLARE
+ // Declare all Visitor functions.
+ virtual void Visit(Metadata* metadata,
+ const Instruction* instr) VIXL_OVERRIDE;
protected:
virtual void ProcessOutput(const Instruction* instr);
@@ -112,10 +113,126 @@ class Disassembler : public DecoderVisitor {
int64_t CodeRelativeAddress(const void* instr);
private:
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
+ VISITOR_LIST(DECLARE)
+#undef DECLARE
+
+ using FormToVisitorFnMap = std::unordered_map<
+ uint32_t,
+ std::function<void(Disassembler*, const Instruction*)>>;
+ static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+
+ std::string mnemonic_;
+ uint32_t form_hash_;
+
+ void SetMnemonicFromForm(const std::string& form) {
+ if (form != "unallocated") {
+ VIXL_ASSERT(form.find_first_of('_') != std::string::npos);
+ mnemonic_ = form.substr(0, form.find_first_of('_'));
+ }
+ }
+
+ void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
+ void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
+ void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr);
+ void Disassemble_ZdD_PgM_ZnS(const Instruction* instr);
+ void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr);
+ void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr);
+ void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr);
+ void Disassemble_ZdH_PgM_ZnS(const Instruction* instr);
+ void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr);
+ void Disassemble_ZdS_PgM_ZnD(const Instruction* instr);
+ void Disassemble_ZdS_PgM_ZnH(const Instruction* instr);
+ void Disassemble_ZdS_PgM_ZnS(const Instruction* instr);
+ void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr);
+ void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr);
+ void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr);
+ void Disassemble_ZdT_PgM_ZnT(const Instruction* instr);
+ void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
+ void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr);
+ void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr);
+ void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr);
+ void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr);
+ void Disassemble_ZdT_ZnTb(const Instruction* instr);
+ void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr);
+ void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr);
+ void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr);
+ void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
+ void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr);
+ void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
+ void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr);
+ void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr);
+ void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
+ void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr);
+ void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
+ void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr);
+ void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr);
+ void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr);
+ void Disassemble_ZdaT_ZnT_const(const Instruction* instr);
+ void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr);
+ void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr);
+ void Disassemble_ZdnB_ZdnB(const Instruction* instr);
+ void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr);
+ void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr);
+ void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
+ void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr);
+ void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
+ void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
+ void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr);
+ void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
+ void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr);
+ void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr);
+ void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr);
+
+ void DisassembleSVEShiftLeftImm(const Instruction* instr);
+ void DisassembleSVEShiftRightImm(const Instruction* instr);
+ void DisassembleSVEAddSubCarry(const Instruction* instr);
+ void DisassembleSVEAddSubHigh(const Instruction* instr);
+ void DisassembleSVEComplexIntAddition(const Instruction* instr);
+ void DisassembleSVEBitwiseTernary(const Instruction* instr);
+ void DisassembleSVEFlogb(const Instruction* instr);
+ void DisassembleSVEFPPair(const Instruction* instr);
+
+ void DisassembleNoArgs(const Instruction* instr);
+
+ void DisassembleNEONMulByElementLong(const Instruction* instr);
+ void DisassembleNEONDotProdByElement(const Instruction* instr);
+ void DisassembleNEONFPMulByElement(const Instruction* instr);
+ void DisassembleNEONHalfFPMulByElement(const Instruction* instr);
+ void DisassembleNEONFPMulByElementLong(const Instruction* instr);
+ void DisassembleNEONComplexMulByElement(const Instruction* instr);
+ void DisassembleNEON2RegLogical(const Instruction* instr);
+ void DisassembleNEON2RegExtract(const Instruction* instr);
+ void DisassembleNEON2RegAddlp(const Instruction* instr);
+ void DisassembleNEON2RegCompare(const Instruction* instr);
+ void DisassembleNEON2RegFPCompare(const Instruction* instr);
+ void DisassembleNEON2RegFPConvert(const Instruction* instr);
+ void DisassembleNEON2RegFP(const Instruction* instr);
+ void DisassembleNEON3SameLogical(const Instruction* instr);
+ void DisassembleNEON3SameFHM(const Instruction* instr);
+ void DisassembleNEON3SameNoD(const Instruction* instr);
+ void DisassembleNEONShiftLeftLongImm(const Instruction* instr);
+ void DisassembleNEONShiftRightImm(const Instruction* instr);
+ void DisassembleNEONShiftRightNarrowImm(const Instruction* instr);
+ void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr);
+ void DisassembleNEONFPScalarMulIndex(const Instruction* instr);
+ void DisassembleNEONFPScalar3Same(const Instruction* instr);
+ void DisassembleNEONScalar3SameOnlyD(const Instruction* instr);
+ void DisassembleNEONFPAcrossLanes(const Instruction* instr);
+ void DisassembleNEONFP16AcrossLanes(const Instruction* instr);
+ void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr);
+ void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr);
+ void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
+ void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
+
void Format(const Instruction* instr,
const char* mnemonic,
const char* format0,
const char* format1 = NULL);
+ void FormatWithDecodedMnemonic(const Instruction* instr,
+ const char* format0,
+ const char* format1 = NULL);
+
void Substitute(const Instruction* instr, const char* string);
int SubstituteField(const Instruction* instr, const char* format);
int SubstituteRegisterField(const Instruction* instr, const char* format);
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index b3e28384..4718e2d2 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -44,7 +44,13 @@ static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
return result;
}
-bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const {
+bool Instruction::CanTakeSVEMovprfx(const char* form,
+ const Instruction* movprfx) const {
+ return CanTakeSVEMovprfx(Hash(form), movprfx);
+}
+
+bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash,
+ const Instruction* movprfx) const {
bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z;
bool movprfx_is_unpredicated =
movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z;
@@ -58,90 +64,201 @@ bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const {
bool pg_matches_low8 = movprfx_pg == GetPgLow8();
bool vform_matches = movprfx_vform == GetSVEVectorFormat();
bool zd_matches = movprfx_zd == GetRd();
- bool zd_matches_zm = movprfx_zd == GetRm();
- bool zd_matches_zn = movprfx_zd == GetRn();
-
- switch (Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask)) {
- case AND_z_zi:
- case EOR_z_zi:
- case ORR_z_zi:
+ bool zd_isnt_zn = movprfx_zd != GetRn();
+ bool zd_isnt_zm = movprfx_zd != GetRm();
+
+ switch (form_hash) {
+ case "cdot_z_zzzi_s"_h:
+ case "sdot_z_zzzi_s"_h:
+ case "sudot_z_zzzi_s"_h:
+ case "udot_z_zzzi_s"_h:
+ case "usdot_z_zzzi_s"_h:
+ return (GetRd() != static_cast<int>(ExtractBits(18, 16))) &&
+ movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+ case "cdot_z_zzzi_d"_h:
+ case "sdot_z_zzzi_d"_h:
+ case "udot_z_zzzi_d"_h:
+ return (GetRd() != static_cast<int>(ExtractBits(19, 16))) &&
+ movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+ case "fmlalb_z_zzzi_s"_h:
+ case "fmlalt_z_zzzi_s"_h:
+ case "fmlslb_z_zzzi_s"_h:
+ case "fmlslt_z_zzzi_s"_h:
+ case "smlalb_z_zzzi_d"_h:
+ case "smlalb_z_zzzi_s"_h:
+ case "smlalt_z_zzzi_d"_h:
+ case "smlalt_z_zzzi_s"_h:
+ case "smlslb_z_zzzi_d"_h:
+ case "smlslb_z_zzzi_s"_h:
+ case "smlslt_z_zzzi_d"_h:
+ case "smlslt_z_zzzi_s"_h:
+ case "sqdmlalb_z_zzzi_d"_h:
+ case "sqdmlalb_z_zzzi_s"_h:
+ case "sqdmlalt_z_zzzi_d"_h:
+ case "sqdmlalt_z_zzzi_s"_h:
+ case "sqdmlslb_z_zzzi_d"_h:
+ case "sqdmlslb_z_zzzi_s"_h:
+ case "sqdmlslt_z_zzzi_d"_h:
+ case "sqdmlslt_z_zzzi_s"_h:
+ case "umlalb_z_zzzi_d"_h:
+ case "umlalb_z_zzzi_s"_h:
+ case "umlalt_z_zzzi_d"_h:
+ case "umlalt_z_zzzi_s"_h:
+ case "umlslb_z_zzzi_d"_h:
+ case "umlslb_z_zzzi_s"_h:
+ case "umlslt_z_zzzi_d"_h:
+ case "umlslt_z_zzzi_s"_h:
+ return (GetRd() != GetSVEMulLongZmAndIndex().first) &&
+ movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+ case "cmla_z_zzzi_h"_h:
+ case "cmla_z_zzzi_s"_h:
+ case "fcmla_z_zzzi_h"_h:
+ case "fcmla_z_zzzi_s"_h:
+ case "fmla_z_zzzi_d"_h:
+ case "fmla_z_zzzi_h"_h:
+ case "fmla_z_zzzi_s"_h:
+ case "fmls_z_zzzi_d"_h:
+ case "fmls_z_zzzi_h"_h:
+ case "fmls_z_zzzi_s"_h:
+ case "mla_z_zzzi_d"_h:
+ case "mla_z_zzzi_h"_h:
+ case "mla_z_zzzi_s"_h:
+ case "mls_z_zzzi_d"_h:
+ case "mls_z_zzzi_h"_h:
+ case "mls_z_zzzi_s"_h:
+ case "sqrdcmlah_z_zzzi_h"_h:
+ case "sqrdcmlah_z_zzzi_s"_h:
+ case "sqrdmlah_z_zzzi_d"_h:
+ case "sqrdmlah_z_zzzi_h"_h:
+ case "sqrdmlah_z_zzzi_s"_h:
+ case "sqrdmlsh_z_zzzi_d"_h:
+ case "sqrdmlsh_z_zzzi_h"_h:
+ case "sqrdmlsh_z_zzzi_s"_h:
+ return (GetRd() != GetSVEMulZmAndIndex().first) &&
+ movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+ case "adclb_z_zzz"_h:
+ case "adclt_z_zzz"_h:
+ case "bcax_z_zzz"_h:
+ case "bsl1n_z_zzz"_h:
+ case "bsl2n_z_zzz"_h:
+ case "bsl_z_zzz"_h:
+ case "cdot_z_zzz"_h:
+ case "cmla_z_zzz"_h:
+ case "eor3_z_zzz"_h:
+ case "eorbt_z_zz"_h:
+ case "eortb_z_zz"_h:
+ case "fmlalb_z_zzz"_h:
+ case "fmlalt_z_zzz"_h:
+ case "fmlslb_z_zzz"_h:
+ case "fmlslt_z_zzz"_h:
+ case "nbsl_z_zzz"_h:
+ case "saba_z_zzz"_h:
+ case "sabalb_z_zzz"_h:
+ case "sabalt_z_zzz"_h:
+ case "sbclb_z_zzz"_h:
+ case "sbclt_z_zzz"_h:
+ case "sdot_z_zzz"_h:
+ case "smlalb_z_zzz"_h:
+ case "smlalt_z_zzz"_h:
+ case "smlslb_z_zzz"_h:
+ case "smlslt_z_zzz"_h:
+ case "sqdmlalb_z_zzz"_h:
+ case "sqdmlalbt_z_zzz"_h:
+ case "sqdmlalt_z_zzz"_h:
+ case "sqdmlslb_z_zzz"_h:
+ case "sqdmlslbt_z_zzz"_h:
+ case "sqdmlslt_z_zzz"_h:
+ case "sqrdcmlah_z_zzz"_h:
+ case "sqrdmlah_z_zzz"_h:
+ case "sqrdmlsh_z_zzz"_h:
+ case "uaba_z_zzz"_h:
+ case "uabalb_z_zzz"_h:
+ case "uabalt_z_zzz"_h:
+ case "udot_z_zzz"_h:
+ case "umlalb_z_zzz"_h:
+ case "umlalt_z_zzz"_h:
+ case "umlslb_z_zzz"_h:
+ case "umlslt_z_zzz"_h:
+ case "usdot_z_zzz_s"_h:
+ case "fmmla_z_zzz_s"_h:
+ case "fmmla_z_zzz_d"_h:
+ case "smmla_z_zzz"_h:
+ case "ummla_z_zzz"_h:
+ case "usmmla_z_zzz"_h:
+ return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches;
+
+ case "addp_z_p_zz"_h:
+ case "cadd_z_zz"_h:
+ case "clasta_z_p_zz"_h:
+ case "clastb_z_p_zz"_h:
+ case "decd_z_zs"_h:
+ case "dech_z_zs"_h:
+ case "decw_z_zs"_h:
+ case "faddp_z_p_zz"_h:
+ case "fmaxnmp_z_p_zz"_h:
+ case "fmaxp_z_p_zz"_h:
+ case "fminnmp_z_p_zz"_h:
+ case "fminp_z_p_zz"_h:
+ case "ftmad_z_zzi"_h:
+ case "incd_z_zs"_h:
+ case "inch_z_zs"_h:
+ case "incw_z_zs"_h:
+ case "insr_z_v"_h:
+ case "smaxp_z_p_zz"_h:
+ case "sminp_z_p_zz"_h:
+ case "splice_z_p_zz_con"_h:
+ case "splice_z_p_zz_des"_h:
+ case "sqcadd_z_zz"_h:
+ case "sqdecd_z_zs"_h:
+ case "sqdech_z_zs"_h:
+ case "sqdecw_z_zs"_h:
+ case "sqincd_z_zs"_h:
+ case "sqinch_z_zs"_h:
+ case "sqincw_z_zs"_h:
+ case "srsra_z_zi"_h:
+ case "ssra_z_zi"_h:
+ case "umaxp_z_p_zz"_h:
+ case "uminp_z_p_zz"_h:
+ case "uqdecd_z_zs"_h:
+ case "uqdech_z_zs"_h:
+ case "uqdecw_z_zs"_h:
+ case "uqincd_z_zs"_h:
+ case "uqinch_z_zs"_h:
+ case "uqincw_z_zs"_h:
+ case "ursra_z_zi"_h:
+ case "usra_z_zi"_h:
+ case "xar_z_zzi"_h:
+ return movprfx_is_unpredicated && zd_isnt_zn && zd_matches;
+
+ case "add_z_zi"_h:
+ case "and_z_zi"_h:
+ case "decp_z_p_z"_h:
+ case "eor_z_zi"_h:
+ case "incp_z_p_z"_h:
+ case "insr_z_r"_h:
+ case "mul_z_zi"_h:
+ case "orr_z_zi"_h:
+ case "smax_z_zi"_h:
+ case "smin_z_zi"_h:
+ case "sqadd_z_zi"_h:
+ case "sqdecp_z_p_z"_h:
+ case "sqincp_z_p_z"_h:
+ case "sqsub_z_zi"_h:
+ case "sub_z_zi"_h:
+ case "subr_z_zi"_h:
+ case "umax_z_zi"_h:
+ case "umin_z_zi"_h:
+ case "uqadd_z_zi"_h:
+ case "uqdecp_z_p_z"_h:
+ case "uqincp_z_p_z"_h:
+ case "uqsub_z_zi"_h:
return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEBitwiseLogical_PredicatedMask)) {
- case AND_z_p_zz:
- case BIC_z_p_zz:
- case EOR_z_p_zz:
- case ORR_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
- case ASRD_z_p_zi:
- case ASR_z_p_zi:
- case LSL_z_p_zi:
- case LSR_z_p_zi:
- if (movprfx_is_predicated) {
- if (!pg_matches_low8) return false;
- unsigned tsz = ExtractBits<0x00c00300>();
- VectorFormat instr_vform =
- SVEFormatFromLaneSizeInBytesLog2(HighestSetBitPosition(tsz));
- if (movprfx_vform != instr_vform) return false;
- }
- return zd_matches;
- }
- switch (Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
- case ASRR_z_p_zz:
- case ASR_z_p_zz:
- case LSLR_z_p_zz:
- case LSL_z_p_zz:
- case LSRR_z_p_zz:
- case LSR_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
- case ASR_z_p_zw:
- case LSL_z_p_zw:
- case LSR_z_p_zw:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEConditionallyBroadcastElementToVectorMask)) {
- case CLASTA_z_p_zz:
- case CLASTB_z_p_zz:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVECopyFPImm_PredicatedMask)) {
- case FCPY_z_p_i:
- if (movprfx_is_predicated) {
- if (!vform_matches) return false;
- if (movprfx_pg != GetRx<19, 16>()) return false;
- }
- return zd_matches;
- }
- switch (Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
- case CPY_z_p_r:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches;
- }
- switch (Mask(SVECopyIntImm_PredicatedMask)) {
- case CPY_z_p_i:
+
+ case "cpy_z_p_i"_h:
if (movprfx_is_predicated) {
if (!vform_matches) return false;
if (movprfx_pg != GetRx<19, 16>()) return false;
@@ -149,397 +266,219 @@ bool Instruction::CanTakeSVEMovprfx(const Instruction* movprfx) const {
// Only the merging form can take movprfx.
if (ExtractBit(14) == 0) return false;
return zd_matches;
+
+ case "fcpy_z_p_i"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_pg == GetRx<19, 16>()) && vform_matches)) &&
+ zd_matches;
+
+ case "flogb_z_p_z"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) &&
+ zd_isnt_zn && zd_matches;
+
+ case "asr_z_p_zi"_h:
+ case "asrd_z_p_zi"_h:
+ case "lsl_z_p_zi"_h:
+ case "lsr_z_p_zi"_h:
+ case "sqshl_z_p_zi"_h:
+ case "sqshlu_z_p_zi"_h:
+ case "srshr_z_p_zi"_h:
+ case "uqshl_z_p_zi"_h:
+ case "urshr_z_p_zi"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_vform ==
+ SVEFormatFromLaneSizeInBytesLog2(
+ GetSVEImmShiftAndLaneSizeLog2(true).second)) &&
+ pg_matches_low8)) &&
+ zd_matches;
+
+ case "fcvt_z_p_z_d2h"_h:
+ case "fcvt_z_p_z_d2s"_h:
+ case "fcvt_z_p_z_h2d"_h:
+ case "fcvt_z_p_z_s2d"_h:
+ case "fcvtx_z_p_z_d2s"_h:
+ case "fcvtzs_z_p_z_d2w"_h:
+ case "fcvtzs_z_p_z_d2x"_h:
+ case "fcvtzs_z_p_z_fp162x"_h:
+ case "fcvtzs_z_p_z_s2x"_h:
+ case "fcvtzu_z_p_z_d2w"_h:
+ case "fcvtzu_z_p_z_d2x"_h:
+ case "fcvtzu_z_p_z_fp162x"_h:
+ case "fcvtzu_z_p_z_s2x"_h:
+ case "scvtf_z_p_z_w2d"_h:
+ case "scvtf_z_p_z_x2d"_h:
+ case "scvtf_z_p_z_x2fp16"_h:
+ case "scvtf_z_p_z_x2s"_h:
+ case "ucvtf_z_p_z_w2d"_h:
+ case "ucvtf_z_p_z_x2d"_h:
+ case "ucvtf_z_p_z_x2fp16"_h:
+ case "ucvtf_z_p_z_x2s"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_vform == kFormatVnD) && pg_matches_low8)) &&
+ zd_isnt_zn && zd_matches;
+
+ case "fcvtzs_z_p_z_fp162h"_h:
+ case "fcvtzu_z_p_z_fp162h"_h:
+ case "scvtf_z_p_z_h2fp16"_h:
+ case "ucvtf_z_p_z_h2fp16"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_vform == kFormatVnH) && pg_matches_low8)) &&
+ zd_isnt_zn && zd_matches;
+
+ case "fcvt_z_p_z_h2s"_h:
+ case "fcvt_z_p_z_s2h"_h:
+ case "fcvtzs_z_p_z_fp162w"_h:
+ case "fcvtzs_z_p_z_s2w"_h:
+ case "fcvtzu_z_p_z_fp162w"_h:
+ case "fcvtzu_z_p_z_s2w"_h:
+ case "scvtf_z_p_z_w2fp16"_h:
+ case "scvtf_z_p_z_w2s"_h:
+ case "ucvtf_z_p_z_w2fp16"_h:
+ case "ucvtf_z_p_z_w2s"_h:
+ return (movprfx_is_unpredicated ||
+ ((movprfx_vform == kFormatVnS) && pg_matches_low8)) &&
+ zd_isnt_zn && zd_matches;
+
+ case "fcmla_z_p_zzz"_h:
+ case "fmad_z_p_zzz"_h:
+ case "fmla_z_p_zzz"_h:
+ case "fmls_z_p_zzz"_h:
+ case "fmsb_z_p_zzz"_h:
+ case "fnmad_z_p_zzz"_h:
+ case "fnmla_z_p_zzz"_h:
+ case "fnmls_z_p_zzz"_h:
+ case "fnmsb_z_p_zzz"_h:
+ case "mad_z_p_zzz"_h:
+ case "mla_z_p_zzz"_h:
+ case "mls_z_p_zzz"_h:
+ case "msb_z_p_zzz"_h:
+ return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+ zd_isnt_zm && zd_isnt_zn && zd_matches;
+
+ case "abs_z_p_z"_h:
+ case "add_z_p_zz"_h:
+ case "and_z_p_zz"_h:
+ case "asr_z_p_zw"_h:
+ case "asr_z_p_zz"_h:
+ case "asrr_z_p_zz"_h:
+ case "bic_z_p_zz"_h:
+ case "cls_z_p_z"_h:
+ case "clz_z_p_z"_h:
+ case "cnot_z_p_z"_h:
+ case "cnt_z_p_z"_h:
+ case "cpy_z_p_v"_h:
+ case "eor_z_p_zz"_h:
+ case "fabd_z_p_zz"_h:
+ case "fabs_z_p_z"_h:
+ case "fadd_z_p_zz"_h:
+ case "fcadd_z_p_zz"_h:
+ case "fdiv_z_p_zz"_h:
+ case "fdivr_z_p_zz"_h:
+ case "fmax_z_p_zz"_h:
+ case "fmaxnm_z_p_zz"_h:
+ case "fmin_z_p_zz"_h:
+ case "fminnm_z_p_zz"_h:
+ case "fmul_z_p_zz"_h:
+ case "fmulx_z_p_zz"_h:
+ case "fneg_z_p_z"_h:
+ case "frecpx_z_p_z"_h:
+ case "frinta_z_p_z"_h:
+ case "frinti_z_p_z"_h:
+ case "frintm_z_p_z"_h:
+ case "frintn_z_p_z"_h:
+ case "frintp_z_p_z"_h:
+ case "frintx_z_p_z"_h:
+ case "frintz_z_p_z"_h:
+ case "fscale_z_p_zz"_h:
+ case "fsqrt_z_p_z"_h:
+ case "fsub_z_p_zz"_h:
+ case "fsubr_z_p_zz"_h:
+ case "lsl_z_p_zw"_h:
+ case "lsl_z_p_zz"_h:
+ case "lslr_z_p_zz"_h:
+ case "lsr_z_p_zw"_h:
+ case "lsr_z_p_zz"_h:
+ case "lsrr_z_p_zz"_h:
+ case "mul_z_p_zz"_h:
+ case "neg_z_p_z"_h:
+ case "not_z_p_z"_h:
+ case "orr_z_p_zz"_h:
+ case "rbit_z_p_z"_h:
+ case "revb_z_z"_h:
+ case "revh_z_z"_h:
+ case "revw_z_z"_h:
+ case "sabd_z_p_zz"_h:
+ case "sadalp_z_p_z"_h:
+ case "sdiv_z_p_zz"_h:
+ case "sdivr_z_p_zz"_h:
+ case "shadd_z_p_zz"_h:
+ case "shsub_z_p_zz"_h:
+ case "shsubr_z_p_zz"_h:
+ case "smax_z_p_zz"_h:
+ case "smin_z_p_zz"_h:
+ case "smulh_z_p_zz"_h:
+ case "sqabs_z_p_z"_h:
+ case "sqadd_z_p_zz"_h:
+ case "sqneg_z_p_z"_h:
+ case "sqrshl_z_p_zz"_h:
+ case "sqrshlr_z_p_zz"_h:
+ case "sqshl_z_p_zz"_h:
+ case "sqshlr_z_p_zz"_h:
+ case "sqsub_z_p_zz"_h:
+ case "sqsubr_z_p_zz"_h:
+ case "srhadd_z_p_zz"_h:
+ case "srshl_z_p_zz"_h:
+ case "srshlr_z_p_zz"_h:
+ case "sub_z_p_zz"_h:
+ case "subr_z_p_zz"_h:
+ case "suqadd_z_p_zz"_h:
+ case "sxtb_z_p_z"_h:
+ case "sxth_z_p_z"_h:
+ case "sxtw_z_p_z"_h:
+ case "uabd_z_p_zz"_h:
+ case "uadalp_z_p_z"_h:
+ case "udiv_z_p_zz"_h:
+ case "udivr_z_p_zz"_h:
+ case "uhadd_z_p_zz"_h:
+ case "uhsub_z_p_zz"_h:
+ case "uhsubr_z_p_zz"_h:
+ case "umax_z_p_zz"_h:
+ case "umin_z_p_zz"_h:
+ case "umulh_z_p_zz"_h:
+ case "uqadd_z_p_zz"_h:
+ case "uqrshl_z_p_zz"_h:
+ case "uqrshlr_z_p_zz"_h:
+ case "uqshl_z_p_zz"_h:
+ case "uqshlr_z_p_zz"_h:
+ case "uqsub_z_p_zz"_h:
+ case "uqsubr_z_p_zz"_h:
+ case "urecpe_z_p_z"_h:
+ case "urhadd_z_p_zz"_h:
+ case "urshl_z_p_zz"_h:
+ case "urshlr_z_p_zz"_h:
+ case "ursqrte_z_p_z"_h:
+ case "usqadd_z_p_zz"_h:
+ case "uxtb_z_p_z"_h:
+ case "uxth_z_p_z"_h:
+ case "uxtw_z_p_z"_h:
+ return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+ zd_isnt_zn && zd_matches;
+
+ case "cpy_z_p_r"_h:
+ case "fadd_z_p_zs"_h:
+ case "fmax_z_p_zs"_h:
+ case "fmaxnm_z_p_zs"_h:
+ case "fmin_z_p_zs"_h:
+ case "fminnm_z_p_zs"_h:
+ case "fmul_z_p_zs"_h:
+ case "fsub_z_p_zs"_h:
+ case "fsubr_z_p_zs"_h:
+ return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) &&
+ zd_matches;
+ default:
+ return false;
}
- switch (Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
- case CPY_z_p_v:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
- case FADD_z_p_zs:
- case FMAXNM_z_p_zs:
- case FMAX_z_p_zs:
- case FMINNM_z_p_zs:
- case FMIN_z_p_zs:
- case FMUL_z_p_zs:
- case FSUBR_z_p_zs:
- case FSUB_z_p_zs:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches;
- }
- switch (Mask(SVEFPArithmetic_PredicatedMask)) {
- case FABD_z_p_zz:
- case FADD_z_p_zz:
- case FDIVR_z_p_zz:
- case FDIV_z_p_zz:
- case FMAXNM_z_p_zz:
- case FMAX_z_p_zz:
- case FMINNM_z_p_zz:
- case FMIN_z_p_zz:
- case FMULX_z_p_zz:
- case FMUL_z_p_zz:
- case FSCALE_z_p_zz:
- case FSUBR_z_p_zz:
- case FSUB_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEFPComplexAdditionMask)) {
- case FCADD_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEFPComplexMulAddIndexMask)) {
- case FCMLA_z_zzzi_h:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<18, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- case FCMLA_z_zzzi_s:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<19, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPComplexMulAddMask)) {
- case FCMLA_z_p_zzz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zm && !zd_matches_zn;
- }
- switch (Mask(SVEFPConvertPrecisionMask)) {
- case FCVT_z_p_z_d2h:
- case FCVT_z_p_z_d2s:
- case FCVT_z_p_z_h2d:
- case FCVT_z_p_z_h2s:
- case FCVT_z_p_z_s2d:
- case FCVT_z_p_z_s2h:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPConvertToIntMask)) {
- case FCVTZS_z_p_z_d2w:
- case FCVTZS_z_p_z_d2x:
- case FCVTZS_z_p_z_fp162h:
- case FCVTZS_z_p_z_fp162w:
- case FCVTZS_z_p_z_fp162x:
- case FCVTZS_z_p_z_s2w:
- case FCVTZS_z_p_z_s2x:
- case FCVTZU_z_p_z_d2w:
- case FCVTZU_z_p_z_d2x:
- case FCVTZU_z_p_z_fp162h:
- case FCVTZU_z_p_z_fp162w:
- case FCVTZU_z_p_z_fp162x:
- case FCVTZU_z_p_z_s2w:
- case FCVTZU_z_p_z_s2x:
- if (movprfx_is_predicated) {
- if (!pg_matches_low8) return false;
- // The movprfx element size must match the instruction's maximum encoded
- // element size. We have to partially decode the opc and opc2 fields to
- // find this.
- unsigned opc = ExtractBits(23, 22);
- unsigned opc2 = ExtractBits(18, 17);
- VectorFormat instr_vform =
- SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
- if (movprfx_vform != instr_vform) return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPMulAddIndexMask)) {
- case FMLA_z_zzzi_h:
- case FMLA_z_zzzi_h_i3h:
- case FMLA_z_zzzi_s:
- case FMLS_z_zzzi_h:
- case FMLS_z_zzzi_h_i3h:
- case FMLS_z_zzzi_s:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<18, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- case FMLA_z_zzzi_d:
- case FMLS_z_zzzi_d:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<19, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPMulAddMask)) {
- case FMAD_z_p_zzz:
- case FMSB_z_p_zzz:
- case FNMAD_z_p_zzz:
- case FNMSB_z_p_zzz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<20, 16>()) return false;
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- case FMLA_z_p_zzz:
- case FMLS_z_p_zzz:
- case FNMLA_z_p_zzz:
- case FNMLS_z_p_zzz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zm && !zd_matches_zn;
- }
- switch (Mask(SVEFPRoundToIntegralValueMask)) {
- case FRINTA_z_p_z:
- case FRINTI_z_p_z:
- case FRINTM_z_p_z:
- case FRINTN_z_p_z:
- case FRINTP_z_p_z:
- case FRINTX_z_p_z:
- case FRINTZ_z_p_z:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEFPTrigMulAddCoefficientMask)) {
- case FTMAD_z_zzi:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEFPUnaryOpMask)) {
- case FRECPX_z_p_z:
- case FSQRT_z_p_z:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEIncDecByPredicateCountMask)) {
- case DECP_z_p_z:
- case INCP_z_p_z:
- case SQDECP_z_p_z:
- case SQINCP_z_p_z:
- case UQDECP_z_p_z:
- case UQINCP_z_p_z:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEIncDecVectorByElementCountMask)) {
- case DECD_z_zs:
- case DECH_z_zs:
- case DECW_z_zs:
- case INCD_z_zs:
- case INCH_z_zs:
- case INCW_z_zs:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEInsertGeneralRegisterMask)) {
- case INSR_z_r:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEInsertSIMDFPScalarRegisterMask)) {
- case INSR_z_v:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
- case ADD_z_zi:
- case SQADD_z_zi:
- case SQSUB_z_zi:
- case SUBR_z_zi:
- case SUB_z_zi:
- case UQADD_z_zi:
- case UQSUB_z_zi:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
- case ADD_z_p_zz:
- case SUBR_z_p_zz:
- case SUB_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEIntConvertToFPMask)) {
- case SCVTF_z_p_z_h2fp16:
- case SCVTF_z_p_z_w2d:
- case SCVTF_z_p_z_w2fp16:
- case SCVTF_z_p_z_w2s:
- case SCVTF_z_p_z_x2d:
- case SCVTF_z_p_z_x2fp16:
- case SCVTF_z_p_z_x2s:
- case UCVTF_z_p_z_h2fp16:
- case UCVTF_z_p_z_w2d:
- case UCVTF_z_p_z_w2fp16:
- case UCVTF_z_p_z_w2s:
- case UCVTF_z_p_z_x2d:
- case UCVTF_z_p_z_x2fp16:
- case UCVTF_z_p_z_x2s:
- if (movprfx_is_predicated) {
- if (!pg_matches_low8) return false;
- // The movprfx element size must match the instruction's maximum encoded
- // element size. We have to partially decode the opc and opc2 fields to
- // find this.
- unsigned opc = ExtractBits(23, 22);
- unsigned opc2 = ExtractBits(18, 17);
- VectorFormat instr_vform =
- SVEFormatFromLaneSizeInBytesLog2(std::max(opc, opc2));
- if (movprfx_vform != instr_vform) return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEIntDivideVectors_PredicatedMask)) {
- case SDIVR_z_p_zz:
- case SDIV_z_p_zz:
- case UDIVR_z_p_zz:
- case UDIV_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEIntMinMaxDifference_PredicatedMask)) {
- case SABD_z_p_zz:
- case SMAX_z_p_zz:
- case SMIN_z_p_zz:
- case UABD_z_p_zz:
- case UMAX_z_p_zz:
- case UMIN_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
- case SMAX_z_zi:
- case SMIN_z_zi:
- case UMAX_z_zi:
- case UMIN_z_zi:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEIntMulAddPredicatedMask)) {
- case MAD_z_p_zzz:
- case MSB_z_p_zzz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches && !zd_matches_zm;
- case MLA_z_p_zzz:
- case MLS_z_p_zzz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zm && !zd_matches_zn;
- }
- switch (Mask(SVEIntMulAddUnpredicatedMask)) {
- case SDOT_z_zzz:
- case UDOT_z_zzz:
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zm &&
- !zd_matches_zn;
- }
- switch (Mask(SVEIntMulImm_UnpredicatedMask)) {
- case MUL_z_zi:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEIntMulVectors_PredicatedMask)) {
- case MUL_z_p_zz:
- case SMULH_z_p_zz:
- case UMULH_z_p_zz:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return zd_matches;
- }
- switch (Mask(SVEIntUnaryArithmeticPredicatedMask)) {
- case ABS_z_p_z:
- case CLS_z_p_z:
- case CLZ_z_p_z:
- case CNOT_z_p_z:
- case CNT_z_p_z:
- case FABS_z_p_z:
- case FNEG_z_p_z:
- case NEG_z_p_z:
- case NOT_z_p_z:
- case SXTB_z_p_z:
- case SXTH_z_p_z:
- case SXTW_z_p_z:
- case UXTB_z_p_z:
- case UXTH_z_p_z:
- case UXTW_z_p_z:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEMulIndexMask)) {
- case SDOT_z_zzzi_s:
- case UDOT_z_zzzi_s:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<18, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- case SDOT_z_zzzi_d:
- case UDOT_z_zzzi_d:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<19, 16>()) return false;
- return movprfx_is_unpredicated && zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVEPermuteVectorExtractMask)) {
- case EXT_z_zi_des:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEReverseWithinElementsMask)) {
- case RBIT_z_p_z:
- case REVB_z_z:
- case REVH_z_z:
- case REVW_z_z:
- if (movprfx_is_predicated && !(pg_matches_low8 && vform_matches)) {
- return false;
- }
- return zd_matches && !zd_matches_zn;
- }
- switch (Mask(SVESaturatingIncDecVectorByElementCountMask)) {
- case SQDECD_z_zs:
- case SQDECH_z_zs:
- case SQDECW_z_zs:
- case SQINCD_z_zs:
- case SQINCH_z_zs:
- case SQINCW_z_zs:
- case UQDECD_z_zs:
- case UQDECH_z_zs:
- case UQDECW_z_zs:
- case UQINCD_z_zs:
- case UQINCH_z_zs:
- case UQINCW_z_zs:
- return movprfx_is_unpredicated && zd_matches;
- }
- switch (Mask(SVEVectorSplice_DestructiveMask)) {
- case SPLICE_z_p_zz_des:
- // The movprfx's `zd` must not alias any other inputs.
- if (movprfx_zd == GetRx<9, 5>()) return false;
- return movprfx_is_unpredicated && zd_matches;
- }
- return false;
} // NOLINT(readability/fn_size)
bool Instruction::IsLoad() const {
@@ -610,6 +549,58 @@ std::pair<int, int> Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const {
return std::make_pair(index, lane_size_in_byte_log_2);
}
+// Get the register and index for SVE indexed multiplies encoded in the forms:
+// .h : Zm = <18:16>, index = <22><20:19>
+// .s : Zm = <18:16>, index = <20:19>
+// .d : Zm = <19:16>, index = <20>
+std::pair<int, int> Instruction::GetSVEMulZmAndIndex() const {
+ int reg_code = GetRmLow16();
+ int index = ExtractBits(20, 19);
+
+ // For .h, index uses bit zero of the size field, so kFormatVnB below implies
+ // half-word lane, with most-significant bit of the index zero.
+ switch (GetSVEVectorFormat()) {
+ case kFormatVnD:
+ index >>= 1; // Only bit 20 in the index for D lanes.
+ break;
+ case kFormatVnH:
+ index += 4; // Bit 22 is the top bit of index.
+ VIXL_FALLTHROUGH();
+ case kFormatVnB:
+ case kFormatVnS:
+ reg_code &= 7; // Three bits used for the register.
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ return std::make_pair(reg_code, index);
+}
+
+// Get the register and index for SVE indexed long multiplies encoded in the
+// forms:
+// .h : Zm = <18:16>, index = <20:19><11>
+// .s : Zm = <19:16>, index = <20><11>
+std::pair<int, int> Instruction::GetSVEMulLongZmAndIndex() const {
+ int reg_code = GetRmLow16();
+ int index = ExtractBit(11);
+
+ // For long multiplies, the SVE size field <23:22> encodes the destination
+ // element size. The source element size is half the width.
+ switch (GetSVEVectorFormat()) {
+ case kFormatVnS:
+ reg_code &= 7;
+ index |= ExtractBits(20, 19) << 1;
+ break;
+ case kFormatVnD:
+ index |= ExtractBit(20) << 1;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ return std::make_pair(reg_code, index);
+}
// Logical immediates can't encode zero, so a return value of zero is used to
// indicate a failure case. Specifically, where the constraints on imm_s are
@@ -717,6 +708,12 @@ int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const {
}
}
+int Instruction::GetSVEExtractImmediate() const {
+ const int imm8h_mask = 0x001F0000;
+ const int imm8l_mask = 0x00001C00;
+ return ExtractBits<imm8h_mask | imm8l_mask>();
+}
+
uint64_t Instruction::DecodeImmBitMask(int32_t n,
int32_t imm_s,
int32_t imm_r,
@@ -1025,7 +1022,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
return kFormatVnH;
case kFormatVnD:
return kFormatVnS;
- break;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@@ -1034,8 +1030,6 @@ VectorFormat VectorFormatHalfWidth(VectorFormat vform) {
VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
- VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S ||
- vform == kFormatB || vform == kFormatH || vform == kFormatS);
switch (vform) {
case kFormat8B:
return kFormat8H;
@@ -1049,6 +1043,12 @@ VectorFormat VectorFormatDoubleWidth(VectorFormat vform) {
return kFormatS;
case kFormatS:
return kFormatD;
+ case kFormatVnB:
+ return kFormatVnH;
+ case kFormatVnH:
+ return kFormatVnS;
+ case kFormatVnS:
+ return kFormatVnD;
default:
VIXL_UNREACHABLE();
return kFormatUndefined;
@@ -1162,6 +1162,7 @@ bool IsSVEFormat(VectorFormat vform) {
case kFormatVnS:
case kFormatVnD:
case kFormatVnQ:
+ case kFormatVnO:
return true;
default:
return false;
@@ -1283,6 +1284,8 @@ unsigned LaneSizeInBitsFromFormat(VectorFormat vform) {
return 64;
case kFormatVnQ:
return 128;
+ case kFormatVnO:
+ return 256;
default:
VIXL_UNREACHABLE();
return 0;
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index 5f56ae16..d92e6ee2 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -199,8 +199,11 @@ enum VectorFormat {
// An artificial value, used to distinguish from NEON format category.
kFormatSVE = 0x0000fffd,
- // An artificial value. Q lane size isn't encoded in the usual size field.
- kFormatSVEQ = 0x000f0000,
+ // Artificial values. Q and O lane sizes aren't encoded in the usual size
+ // field.
+ kFormatSVEQ = 0x00080000,
+ kFormatSVEO = 0x00040000,
+
// Vector element width of SVE register with the unknown lane count since
// the vector length is implementation dependent.
kFormatVnB = SVE_B | kFormatSVE,
@@ -208,6 +211,7 @@ enum VectorFormat {
kFormatVnS = SVE_S | kFormatSVE,
kFormatVnD = SVE_D | kFormatSVE,
kFormatVnQ = kFormatSVEQ | kFormatSVE,
+ kFormatVnO = kFormatSVEO | kFormatSVE,
// An artificial value, used by simulator trace tests and a few oddball
// instructions (such as FMLAL).
@@ -267,11 +271,21 @@ class Instruction {
return Compress(M);
}
+ uint32_t ExtractBitsAbsent() const {
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+
template <uint32_t M, uint32_t V>
uint32_t IsMaskedValue() const {
return (Mask(M) == V) ? 1 : 0;
}
+ uint32_t IsMaskedValueAbsent() const {
+ VIXL_UNREACHABLE();
+ return 0;
+ }
+
int32_t ExtractSignedBits(int msb, int lsb) const {
int32_t bits = *(reinterpret_cast<const int32_t*>(this));
return ExtractSignedBitfield32(msb, lsb, bits);
@@ -300,8 +314,13 @@ class Instruction {
return this->ExtractBits(msb, lsb);
}
- VectorFormat GetSVEVectorFormat() const {
- switch (Mask(SVESizeFieldMask)) {
+ VectorFormat GetSVEVectorFormat(int field_lsb = 22) const {
+ VIXL_ASSERT((field_lsb >= 0) && (field_lsb <= 30));
+ uint32_t instr = ExtractUnsignedBitfield32(field_lsb + 1,
+ field_lsb,
+ GetInstructionBits())
+ << 22;
+ switch (instr & SVESizeFieldMask) {
case SVE_B:
return kFormatVnB;
case SVE_H:
@@ -349,12 +368,18 @@ class Instruction {
std::pair<int, int> GetSVEPermuteIndexAndLaneSizeLog2() const;
+ std::pair<int, int> GetSVEMulZmAndIndex() const;
+ std::pair<int, int> GetSVEMulLongZmAndIndex() const;
+
std::pair<int, int> GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const;
+ int GetSVEExtractImmediate() const;
+
int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const;
int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const;
+
unsigned GetImmNEONabcdefgh() const;
VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) {
return GetImmNEONabcdefgh();
@@ -453,7 +478,8 @@ class Instruction {
}
// True if `this` is valid immediately after the provided movprfx instruction.
- bool CanTakeSVEMovprfx(Instruction const* movprfx) const;
+ bool CanTakeSVEMovprfx(uint32_t form_hash, Instruction const* movprfx) const;
+ bool CanTakeSVEMovprfx(const char* form, Instruction const* movprfx) const;
bool IsLoad() const;
bool IsStore() const;
@@ -789,18 +815,26 @@ class NEONFormatDecoder {
SubstitutionMode mode0 = kFormat,
SubstitutionMode mode1 = kFormat,
SubstitutionMode mode2 = kFormat) {
+ const char* subst0 = GetSubstitute(0, mode0);
+ const char* subst1 = GetSubstitute(1, mode1);
+ const char* subst2 = GetSubstitute(2, mode2);
+
+ if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) {
+ return NULL;
+ }
+
snprintf(form_buffer_,
sizeof(form_buffer_),
string,
- GetSubstitute(0, mode0),
- GetSubstitute(1, mode1),
- GetSubstitute(2, mode2));
+ subst0,
+ subst1,
+ subst2);
return form_buffer_;
}
- // Append a "2" to a mnemonic string based of the state of the Q bit.
+ // Append a "2" to a mnemonic string based on the state of the Q bit.
const char* Mnemonic(const char* mnemonic) {
- if ((instrbits_ & NEON_Q) != 0) {
+ if ((mnemonic != NULL) && (instrbits_ & NEON_Q) != 0) {
snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic);
return mne_buffer_;
}
@@ -895,6 +929,33 @@ class NEONFormatDecoder {
return &map;
}
+ // The shift immediate map uses between two and five bits to encode the NEON
+ // vector format:
+ // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
+ // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
+ static const NEONFormatMap* ShiftImmFormatMap() {
+ static const NEONFormatMap map = {{22, 21, 20, 19, 30},
+ {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B,
+ NF_4H, NF_8H, NF_4H, NF_8H,
+ NF_2S, NF_4S, NF_2S, NF_4S,
+ NF_2S, NF_4S, NF_2S, NF_4S,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
+ NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}};
+ return &map;
+ }
+
+ // The shift long/narrow immediate map uses between two and four bits to
+ // encode the NEON vector format:
+ // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
+ static const NEONFormatMap* ShiftLongNarrowImmFormatMap() {
+ static const NEONFormatMap map =
+ {{22, 21, 20, 19},
+ {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
+ return &map;
+ }
+
// The scalar format map uses two bits (size<1:0>) to encode the NEON scalar
// formats: NF_B, NF_H, NF_S, NF_D.
static const NEONFormatMap* ScalarFormatMap() {
@@ -968,7 +1029,7 @@ class NEONFormatDecoder {
static const char* NEONFormatAsString(NEONFormat format) {
// clang-format off
static const char* formats[] = {
- "undefined",
+ NULL,
"8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d",
"b", "h", "s", "d"
};
@@ -983,9 +1044,9 @@ class NEONFormatDecoder {
(format == NF_D) || (format == NF_UNDEF));
// clang-format off
static const char* formats[] = {
- "undefined",
- "undefined", "undefined", "undefined", "undefined",
- "undefined", "undefined", "undefined", "undefined",
+ NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
"'B", "'H", "'S", "'D"
};
// clang-format on
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index cb82f715..a77e7f28 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -625,6 +625,9 @@ LogicVRegister Simulator::addp(VectorFormat vform,
uzp1(vform, temp1, src1, src2);
uzp2(vform, temp2, src1, src2);
add(vform, dst, temp1, temp2);
+ if (IsSVEFormat(vform)) {
+ interleave_top_bottom(vform, dst, dst);
+ }
return dst;
}
@@ -723,7 +726,7 @@ LogicVRegister Simulator::smulh(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- int64_t dst_val;
+ int64_t dst_val = 0xbadbeef;
int64_t val1 = src1.Int(vform, i);
int64_t val2 = src2.Int(vform, i);
switch (LaneSizeInBitsFromFormat(vform)) {
@@ -740,7 +743,6 @@ LogicVRegister Simulator::smulh(VectorFormat vform,
dst_val = internal::MultiplyHigh<64>(val1, val2);
break;
default:
- dst_val = 0xbadbeef;
VIXL_UNREACHABLE();
break;
}
@@ -755,7 +757,7 @@ LogicVRegister Simulator::umulh(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- uint64_t dst_val;
+ uint64_t dst_val = 0xbadbeef;
uint64_t val1 = src1.Uint(vform, i);
uint64_t val2 = src2.Uint(vform, i);
switch (LaneSizeInBitsFromFormat(vform)) {
@@ -772,7 +774,6 @@ LogicVRegister Simulator::umulh(VectorFormat vform,
dst_val = internal::MultiplyHigh<64>(val1, val2);
break;
default:
- dst_val = 0xbadbeef;
VIXL_UNREACHABLE();
break;
}
@@ -803,151 +804,6 @@ LogicVRegister Simulator::mls(VectorFormat vform,
return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
}
-
-LogicVRegister Simulator::smull(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::smull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umull(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::smlal(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::smlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umlal(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::smlsl(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::smlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umlsl(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
-LogicVRegister Simulator::umlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
LogicVRegister Simulator::sqdmull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -959,19 +815,6 @@ LogicVRegister Simulator::sqdmull(VectorFormat vform,
return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
}
-
-LogicVRegister Simulator::sqdmull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -983,19 +826,6 @@ LogicVRegister Simulator::sqdmlal(VectorFormat vform,
return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
}
-
-LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -1007,19 +837,6 @@ LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
}
-
-LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- VectorFormat indexform =
- VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
- return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
-}
-
-
LogicVRegister Simulator::sqdmulh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -1042,23 +859,6 @@ LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
}
-LogicVRegister Simulator::sdot(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- // NEON indexed `dot` allows the index value exceed the register size.
- // Promote the format to Q-sized vector format before the duplication.
- dup_elements_to_segments(IsSVEFormat(vform) ? vform
- : VectorFormatFillQ(vform),
- temp,
- src2,
- index);
- return sdot(vform, dst, src1, temp);
-}
-
-
LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -1070,23 +870,6 @@ LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
}
-LogicVRegister Simulator::udot(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index) {
- SimVRegister temp;
- // NEON indexed `dot` allows the index value exceed the register size.
- // Promote the format to Q-sized vector format before the duplication.
- dup_elements_to_segments(IsSVEFormat(vform) ? vform
- : VectorFormatFillQ(vform),
- temp,
- src2,
- index);
- return udot(vform, dst, src1, temp);
-}
-
-
LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -1098,12 +881,16 @@ LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
}
-uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
- uint16_t result = 0;
- uint16_t extended_op2 = op2;
- for (int i = 0; i < 8; ++i) {
+uint64_t Simulator::PolynomialMult(uint64_t op1,
+ uint64_t op2,
+ int lane_size_in_bits) const {
+ VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
+ VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
+ VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
+ uint64_t result = 0;
+ for (int i = 0; i < lane_size_in_bits; ++i) {
if ((op1 >> i) & 1) {
- result = result ^ (extended_op2 << i);
+ result = result ^ (op2 << i);
}
}
return result;
@@ -1118,7 +905,9 @@ LogicVRegister Simulator::pmul(VectorFormat vform,
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
dst.SetUint(vform,
i,
- PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
+ PolynomialMult(src1.Uint(vform, i),
+ src2.Uint(vform, i),
+ LaneSizeInBitsFromFormat(vform)));
}
return dst;
}
@@ -1128,14 +917,17 @@ LogicVRegister Simulator::pmull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- VectorFormat vform_src = VectorFormatHalfWidth(vform);
dst.ClearForWrite(vform);
+
+ VectorFormat vform_src = VectorFormatHalfWidth(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
dst.SetUint(vform,
i,
PolynomialMult(src1.Uint(vform_src, i),
- src2.Uint(vform_src, i)));
+ src2.Uint(vform_src, i),
+ LaneSizeInBitsFromFormat(vform_src)));
}
+
return dst;
}
@@ -1151,7 +943,8 @@ LogicVRegister Simulator::pmull2(VectorFormat vform,
dst.SetUint(vform,
i,
PolynomialMult(src1.Uint(vform_src, lane_count + i),
- src2.Uint(vform_src, lane_count + i)));
+ src2.Uint(vform_src, lane_count + i),
+ LaneSizeInBitsFromFormat(vform_src)));
}
return dst;
}
@@ -1327,12 +1120,13 @@ LogicVRegister Simulator::bit(VectorFormat vform,
LogicVRegister Simulator::bsl(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& src_mask,
const LogicVRegister& src1,
const LogicVRegister& src2) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
uint64_t operand1 = src2.Uint(vform, i);
- uint64_t operand2 = dst.Uint(vform, i);
+ uint64_t operand2 = src_mask.Uint(vform, i);
uint64_t operand3 = src1.Uint(vform, i);
uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
dst.SetUint(vform, i, result);
@@ -1383,11 +1177,11 @@ LogicVRegister Simulator::sminmaxp(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2,
bool max) {
- int lanes = LaneCountFromFormat(vform);
- int64_t result[kMaxLanesPerVector];
+ unsigned lanes = LaneCountFromFormat(vform);
+ int64_t result[kZRegMaxSizeInBytes];
const LogicVRegister* src = &src1;
- for (int j = 0; j < 2; j++) {
- for (int i = 0; i < lanes; i += 2) {
+ for (unsigned j = 0; j < 2; j++) {
+ for (unsigned i = 0; i < lanes; i += 2) {
int64_t first_val = src->Int(vform, i);
int64_t second_val = src->Int(vform, i + 1);
int64_t dst_val;
@@ -1396,12 +1190,15 @@ LogicVRegister Simulator::sminmaxp(VectorFormat vform,
} else {
dst_val = (first_val < second_val) ? first_val : second_val;
}
- VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
+ VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
result[(i >> 1) + (j * lanes / 2)] = dst_val;
}
src = &src2;
}
dst.SetIntArray(vform, result);
+ if (IsSVEFormat(vform)) {
+ interleave_top_bottom(vform, dst, dst);
+ }
return dst;
}
@@ -1586,11 +1383,11 @@ LogicVRegister Simulator::uminmaxp(VectorFormat vform,
const LogicVRegister& src1,
const LogicVRegister& src2,
bool max) {
- int lanes = LaneCountFromFormat(vform);
- uint64_t result[kMaxLanesPerVector];
+ unsigned lanes = LaneCountFromFormat(vform);
+ uint64_t result[kZRegMaxSizeInBytes];
const LogicVRegister* src = &src1;
- for (int j = 0; j < 2; j++) {
- for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+ for (unsigned j = 0; j < 2; j++) {
+ for (unsigned i = 0; i < lanes; i += 2) {
uint64_t first_val = src->Uint(vform, i);
uint64_t second_val = src->Uint(vform, i + 1);
uint64_t dst_val;
@@ -1599,12 +1396,15 @@ LogicVRegister Simulator::uminmaxp(VectorFormat vform,
} else {
dst_val = (first_val < second_val) ? first_val : second_val;
}
- VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
+ VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
result[(i >> 1) + (j * lanes / 2)] = dst_val;
}
src = &src2;
}
dst.SetUintArray(vform, result);
+ if (IsSVEFormat(vform)) {
+ interleave_top_bottom(vform, dst, dst);
+ }
return dst;
}
@@ -1811,9 +1611,8 @@ LogicVRegister Simulator::splice(VectorFormat vform,
result[i] = src2.Uint(vform, i - dst_idx);
}
- for (int i = 0; i < lane_count; i++) {
- dst.SetUint(vform, i, result[i]);
- }
+ dst.SetUintArray(vform, result);
+
return dst;
}
@@ -2060,14 +1859,33 @@ LogicVRegister Simulator::cnt(VectorFormat vform,
return dst;
}
+static int64_t CalculateSignedShiftDistance(int64_t shift_val,
+ int esize,
+ bool shift_in_ls_byte) {
+ if (shift_in_ls_byte) {
+ // Neon uses the least-significant byte of the lane as the shift distance.
+ shift_val = ExtractSignedBitfield64(7, 0, shift_val);
+ } else {
+ // SVE uses a saturated shift distance in the range
+ // -(esize + 1) ... (esize + 1).
+ if (shift_val > (esize + 1)) shift_val = esize + 1;
+ if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
+ }
+ return shift_val;
+}
LogicVRegister Simulator::sshl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool shift_in_ls_byte) {
dst.ClearForWrite(vform);
+ int esize = LaneSizeInBitsFromFormat(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- int8_t shift_val = src2.Int(vform, i);
+ int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
+ esize,
+ shift_in_ls_byte);
+
int64_t lj_src_val = src1.IntLeftJustified(vform, i);
// Set signed saturation state.
@@ -2124,10 +1942,15 @@ LogicVRegister Simulator::sshl(VectorFormat vform,
LogicVRegister Simulator::ushl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool shift_in_ls_byte) {
dst.ClearForWrite(vform);
+ int esize = LaneSizeInBitsFromFormat(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- int8_t shift_val = src2.Int(vform, i);
+ int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
+ esize,
+ shift_in_ls_byte);
+
uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
// Set saturation state.
@@ -2159,6 +1982,27 @@ LogicVRegister Simulator::ushl(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::sshr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ // Saturate to sidestep the min-int problem.
+ neg(vform, temp, src2).SignedSaturate(vform);
+ sshl(vform, dst, src1, temp, false);
+ return dst;
+}
+
+LogicVRegister Simulator::ushr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ SimVRegister temp;
+ // Saturate to sidestep the min-int problem.
+ neg(vform, temp, src2).SignedSaturate(vform);
+ ushl(vform, dst, src1, temp, false);
+ return dst;
+}
LogicVRegister Simulator::neg(VectorFormat vform,
LogicVRegister dst,
@@ -2178,11 +2022,12 @@ LogicVRegister Simulator::neg(VectorFormat vform,
LogicVRegister Simulator::suqadd(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src) {
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- int64_t sa = dst.IntLeftJustified(vform, i);
- uint64_t ub = src.UintLeftJustified(vform, i);
+ int64_t sa = src1.IntLeftJustified(vform, i);
+ uint64_t ub = src2.UintLeftJustified(vform, i);
uint64_t ur = sa + ub;
int64_t sr;
@@ -2190,7 +2035,7 @@ LogicVRegister Simulator::suqadd(VectorFormat vform,
if (sr < sa) { // Test for signed positive saturation.
dst.SetInt(vform, i, MaxIntFromFormat(vform));
} else {
- dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
+ dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
}
}
return dst;
@@ -2199,11 +2044,12 @@ LogicVRegister Simulator::suqadd(VectorFormat vform,
LogicVRegister Simulator::usqadd(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src) {
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- uint64_t ua = dst.UintLeftJustified(vform, i);
- int64_t sb = src.IntLeftJustified(vform, i);
+ uint64_t ua = src1.UintLeftJustified(vform, i);
+ int64_t sb = src2.IntLeftJustified(vform, i);
uint64_t ur = ua + sb;
if ((sb > 0) && (ur <= ua)) {
@@ -2211,7 +2057,7 @@ LogicVRegister Simulator::usqadd(VectorFormat vform,
} else if ((sb < 0) && (ur >= ua)) {
dst.SetUint(vform, i, 0); // Negative saturation.
} else {
- dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
+ dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
}
}
return dst;
@@ -2341,55 +2187,15 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
const LogicVRegister& src,
bool src_is_signed) {
bool upperhalf = false;
- VectorFormat srcform = kFormatUndefined;
- int64_t ssrc[8];
- uint64_t usrc[8];
-
- switch (dstform) {
- case kFormat8B:
- upperhalf = false;
- srcform = kFormat8H;
- break;
- case kFormat16B:
- upperhalf = true;
- srcform = kFormat8H;
- break;
- case kFormat4H:
- upperhalf = false;
- srcform = kFormat4S;
- break;
- case kFormat8H:
- upperhalf = true;
- srcform = kFormat4S;
- break;
- case kFormat2S:
- upperhalf = false;
- srcform = kFormat2D;
- break;
- case kFormat4S:
- upperhalf = true;
- srcform = kFormat2D;
- break;
- case kFormatB:
- upperhalf = false;
- srcform = kFormatH;
- break;
- case kFormatH:
- upperhalf = false;
- srcform = kFormatS;
- break;
- case kFormatS:
- upperhalf = false;
- srcform = kFormatD;
- break;
- default:
- VIXL_UNIMPLEMENTED();
+ VectorFormat srcform = dstform;
+ if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
+ (dstform == kFormat4S)) {
+ upperhalf = true;
+ srcform = VectorFormatHalfLanes(srcform);
}
+ srcform = VectorFormatDoubleWidth(srcform);
- for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
- ssrc[i] = src.Int(srcform, i);
- usrc[i] = src.Uint(srcform, i);
- }
+ LogicVRegister src_copy = src;
int offset;
if (upperhalf) {
@@ -2400,31 +2206,34 @@ LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
}
for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
+ int64_t ssrc = src_copy.Int(srcform, i);
+ uint64_t usrc = src_copy.Uint(srcform, i);
+
// Test for signed saturation
- if (ssrc[i] > MaxIntFromFormat(dstform)) {
+ if (ssrc > MaxIntFromFormat(dstform)) {
dst.SetSignedSat(offset + i, true);
- } else if (ssrc[i] < MinIntFromFormat(dstform)) {
+ } else if (ssrc < MinIntFromFormat(dstform)) {
dst.SetSignedSat(offset + i, false);
}
// Test for unsigned saturation
if (src_is_signed) {
- if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
+ if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
dst.SetUnsignedSat(offset + i, true);
- } else if (ssrc[i] < 0) {
+ } else if (ssrc < 0) {
dst.SetUnsignedSat(offset + i, false);
}
} else {
- if (usrc[i] > MaxUintFromFormat(dstform)) {
+ if (usrc > MaxUintFromFormat(dstform)) {
dst.SetUnsignedSat(offset + i, true);
}
}
int64_t result;
if (src_is_signed) {
- result = ssrc[i] & MaxUintFromFormat(dstform);
+ result = ssrc & MaxUintFromFormat(dstform);
} else {
- result = usrc[i] & MaxUintFromFormat(dstform);
+ result = usrc & MaxUintFromFormat(dstform);
}
if (dst_is_signed) {
@@ -2565,7 +2374,7 @@ LogicVRegister Simulator::rev_byte(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int rev_size) {
- uint64_t result[kZRegMaxSizeInBytes];
+ uint64_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
int lane_size = LaneSizeInBytesFromFormat(vform);
int lanes_per_loop = rev_size / lane_size;
@@ -2602,17 +2411,15 @@ LogicVRegister Simulator::rev64(VectorFormat vform,
return rev_byte(vform, dst, src, 8);
}
-
LogicVRegister Simulator::addlp(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
bool is_signed,
bool do_accumulate) {
VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
- VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
- uint64_t result[8];
+ uint64_t result[kZRegMaxSizeInBytes];
int lane_count = LaneCountFromFormat(vform);
for (int i = 0; i < lane_count; i++) {
if (is_signed) {
@@ -2662,13 +2469,24 @@ LogicVRegister Simulator::uadalp(VectorFormat vform,
return addlp(vform, dst, src, false, true);
}
+LogicVRegister Simulator::ror(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rotation) {
+ int width = LaneSizeInBitsFromFormat(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t value = src.Uint(vform, i);
+ dst.SetUint(vform, i, RotateRight(value, rotation, width));
+ }
+ return dst;
+}
LogicVRegister Simulator::ext(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index) {
- uint8_t result[kZRegMaxSizeInBytes];
+ uint8_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
for (int i = 0; i < lane_count - index; ++i) {
result[i] = src1.Uint(vform, i + index);
@@ -2683,6 +2501,17 @@ LogicVRegister Simulator::ext(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int index) {
+ if (index < 0) index += LaneCountFromFormat(vform);
+ VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
+ index *= LaneSizeInBytesFromFormat(vform);
+ return ext(kFormatVnB, dst, src, src, index);
+}
+
+
template <typename T>
LogicVRegister Simulator::fadda(VectorFormat vform,
LogicVRegister acc,
@@ -2869,21 +2698,198 @@ LogicVRegister Simulator::fcmla(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::cadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot,
+ bool saturate) {
+ SimVRegister src1_r, src1_i;
+ SimVRegister src2_r, src2_i;
+ SimVRegister zero;
+ zero.Clear();
+ uzp1(vform, src1_r, src1, zero);
+ uzp2(vform, src1_i, src1, zero);
+ uzp1(vform, src2_r, src2, zero);
+ uzp2(vform, src2_i, src2, zero);
+
+ if (rot == 90) {
+ if (saturate) {
+ sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
+ add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
+ } else {
+ sub(vform, src1_r, src1_r, src2_i);
+ add(vform, src1_i, src1_i, src2_r);
+ }
+ } else {
+ VIXL_ASSERT(rot == 270);
+ if (saturate) {
+ add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
+ sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
+ } else {
+ add(vform, src1_r, src1_r, src2_i);
+ sub(vform, src1_i, src1_i, src2_r);
+ }
+ }
+
+ zip1(vform, dst, src1_r, src1_i);
+ return dst;
+}
+
+LogicVRegister Simulator::cmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot) {
+ SimVRegister src1_a;
+ SimVRegister src2_a, src2_b;
+ SimVRegister srca_i, srca_r;
+ SimVRegister zero, temp;
+ zero.Clear();
+
+ if ((rot == 0) || (rot == 180)) {
+ uzp1(vform, src1_a, src1, zero);
+ uzp1(vform, src2_a, src2, zero);
+ uzp2(vform, src2_b, src2, zero);
+ } else {
+ uzp2(vform, src1_a, src1, zero);
+ uzp2(vform, src2_a, src2, zero);
+ uzp1(vform, src2_b, src2, zero);
+ }
+
+ uzp1(vform, srca_r, srca, zero);
+ uzp2(vform, srca_i, srca, zero);
+
+ bool sub_r = (rot == 90) || (rot == 180);
+ bool sub_i = (rot == 180) || (rot == 270);
+
+ mul(vform, temp, src1_a, src2_a);
+ if (sub_r) {
+ sub(vform, srca_r, srca_r, temp);
+ } else {
+ add(vform, srca_r, srca_r, temp);
+ }
+
+ mul(vform, temp, src1_a, src2_b);
+ if (sub_i) {
+ sub(vform, srca_i, srca_i, temp);
+ } else {
+ add(vform, srca_i, srca_i, temp);
+ }
+
+ zip1(vform, dst, srca_r, srca_i);
+ return dst;
+}
+
+LogicVRegister Simulator::cmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index,
+ int rot) {
+ SimVRegister temp;
+ dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
+ return cmla(vform, dst, srca, src1, temp, rot);
+}
+
+LogicVRegister Simulator::bgrp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool do_bext) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t value = src1.Uint(vform, i);
+ uint64_t mask = src2.Uint(vform, i);
+ int high_pos = 0;
+ int low_pos = 0;
+ uint64_t result_high = 0;
+ uint64_t result_low = 0;
+ for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
+ if ((mask & 1) == 0) {
+ result_high |= (value & 1) << high_pos;
+ high_pos++;
+ } else {
+ result_low |= (value & 1) << low_pos;
+ low_pos++;
+ }
+ mask >>= 1;
+ value >>= 1;
+ }
+
+ if (!do_bext) {
+ result_low |= result_high << low_pos;
+ }
+
+ dst.SetUint(vform, i, result_low);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::bdep(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t value = src1.Uint(vform, i);
+ uint64_t mask = src2.Uint(vform, i);
+ uint64_t result = 0;
+ for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
+ if ((mask & 1) == 1) {
+ result |= (value & 1) << j;
+ value >>= 1;
+ }
+ mask >>= 1;
+ }
+ dst.SetUint(vform, i, result);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::histogram(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool do_segmented) {
+ int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
+ uint64_t result[kZRegMaxSizeInBytes];
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t count = 0;
+ uint64_t value = src1.Uint(vform, i);
+
+ int segment = do_segmented ? (i / elements_per_segment) : 0;
+ int segment_offset = segment * elements_per_segment;
+ int hist_limit = do_segmented ? elements_per_segment : (i + 1);
+ for (int j = 0; j < hist_limit; j++) {
+ if (pg.IsActive(vform, j) &&
+ (value == src2.Uint(vform, j + segment_offset))) {
+ count++;
+ }
+ }
+ result[i] = count;
+ }
+ dst.SetUintArray(vform, result);
+ return dst;
+}
LogicVRegister Simulator::dup_element(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
int src_index) {
- if (vform == kFormatVnQ) {
- // When duplicating a 128-bit value, split it into two 64-bit parts, and
- // then copy the two to their slots on destination register.
- uint64_t low = src.Uint(kFormatVnD, src_index * 2);
- uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
- dst.ClearForWrite(vform);
- for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
- d_lane += 2) {
- dst.SetUint(kFormatVnD, d_lane, low);
- dst.SetUint(kFormatVnD, d_lane + 1, high);
+ if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
+ // When duplicating an element larger than 64 bits, split the element into
+ // 64-bit parts, and duplicate the parts across the destination.
+ uint64_t d[4];
+ int count = (vform == kFormatVnQ) ? 2 : 4;
+ for (int i = 0; i < count; i++) {
+ d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
+ }
+ dst.Clear();
+ for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
+ dst.SetUint(kFormatVnD, i, d[i % count]);
}
} else {
int lane_count = LaneCountFromFormat(vform);
@@ -2920,6 +2926,16 @@ LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::dup_elements_to_segments(
+ VectorFormat vform,
+ LogicVRegister dst,
+ const std::pair<int, int>& src_and_index) {
+ return dup_elements_to_segments(vform,
+ dst,
+ ReadVRegister(src_and_index.first),
+ src_and_index.second);
+}
+
LogicVRegister Simulator::dup_immediate(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
@@ -3008,7 +3024,6 @@ LogicVRegister Simulator::mov_merging(VectorFormat vform,
return sel(vform, dst, pg, src, dst);
}
-
LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
LogicVRegister dst,
const SimPRegister& pg,
@@ -3018,6 +3033,16 @@ LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
return sel(vform, dst, pg, src, zero);
}
+LogicVRegister Simulator::mov_alternating(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int start_at) {
+ VIXL_ASSERT((start_at == 0) || (start_at == 1));
+ for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
+ dst.SetUint(vform, i, src.Uint(vform, i));
+ }
+ return dst;
+}
LogicPRegister Simulator::mov_merging(LogicPRegister dst,
const LogicPRegister& pg,
@@ -3025,7 +3050,6 @@ LogicPRegister Simulator::mov_merging(LogicPRegister dst,
return sel(dst, pg, src, dst);
}
-
LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
const LogicPRegister& pg,
const LogicPRegister& src) {
@@ -3033,7 +3057,6 @@ LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
return sel(dst, pg, src, pfalse(all_false));
}
-
LogicVRegister Simulator::movi(VectorFormat vform,
LogicVRegister dst,
uint64_t imm) {
@@ -3077,12 +3100,15 @@ LogicVRegister Simulator::orr(VectorFormat vform,
LogicVRegister Simulator::uxtl(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src) {
+ const LogicVRegister& src,
+ bool is_2) {
VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ int lane_count = LaneCountFromFormat(vform);
+ int src_offset = is_2 ? lane_count : 0;
dst.ClearForWrite(vform);
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.SetUint(vform, i, src.Uint(vform_half, i));
+ for (int i = 0; i < lane_count; i++) {
+ dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
}
return dst;
}
@@ -3090,12 +3116,15 @@ LogicVRegister Simulator::uxtl(VectorFormat vform,
LogicVRegister Simulator::sxtl(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src) {
+ const LogicVRegister& src,
+ bool is_2) {
VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ int lane_count = LaneCountFromFormat(vform);
+ int src_offset = is_2 ? lane_count : 0;
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.SetInt(vform, i, src.Int(vform_half, i));
+ dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
}
return dst;
}
@@ -3104,28 +3133,14 @@ LogicVRegister Simulator::sxtl(VectorFormat vform,
LogicVRegister Simulator::uxtl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- VectorFormat vform_half = VectorFormatHalfWidth(vform);
- int lane_count = LaneCountFromFormat(vform);
-
- dst.ClearForWrite(vform);
- for (int i = 0; i < lane_count; i++) {
- dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
- }
- return dst;
+ return uxtl(vform, dst, src, /* is_2 = */ true);
}
LogicVRegister Simulator::sxtl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- VectorFormat vform_half = VectorFormatHalfWidth(vform);
- int lane_count = LaneCountFromFormat(vform);
-
- dst.ClearForWrite(vform);
- for (int i = 0; i < lane_count; i++) {
- dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
- }
- return dst;
+ return sxtl(vform, dst, src, /* is_2 = */ true);
}
@@ -3209,23 +3224,6 @@ LogicVRegister Simulator::rshrn2(VectorFormat vform,
LogicVRegister Simulator::Table(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& tab,
- const LogicVRegister& ind) {
- VIXL_ASSERT(IsSVEFormat(vform));
- int lane_count = LaneCountFromFormat(vform);
- for (int i = 0; i < lane_count; i++) {
- uint64_t index = ind.Uint(vform, i);
- uint64_t value = (index >= static_cast<uint64_t>(lane_count))
- ? 0
- : tab.Uint(vform, static_cast<int>(index));
- dst.SetUint(vform, i, value);
- }
- return dst;
-}
-
-
-LogicVRegister Simulator::Table(VectorFormat vform,
- LogicVRegister dst,
const LogicVRegister& ind,
bool zero_out_of_bounds,
const LogicVRegister* tab1,
@@ -3233,24 +3231,30 @@ LogicVRegister Simulator::Table(VectorFormat vform,
const LogicVRegister* tab3,
const LogicVRegister* tab4) {
VIXL_ASSERT(tab1 != NULL);
- const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
- uint64_t result[kMaxLanesPerVector];
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
- }
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- uint64_t j = ind.Uint(vform, i);
- int tab_idx = static_cast<int>(j >> 4);
- int j_idx = static_cast<int>(j & 15);
- if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
- result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
- }
+ int lane_count = LaneCountFromFormat(vform);
+ VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
+ uint64_t table[kZRegMaxSizeInBytes * 2];
+ uint64_t result[kZRegMaxSizeInBytes];
+
+ // For Neon, the table source registers are always 16B, and Neon allows only
+ // 8B or 16B vform for the destination, so infer the table format from the
+ // destination.
+ VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
+
+ uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
+ if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
+ if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
+ if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
+
+ for (int i = 0; i < lane_count; i++) {
+ uint64_t index = ind.Uint(vform, i);
+ result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
+ if (index < tab_size) result[i] = table[index];
}
dst.SetUintArray(vform, result);
return dst;
}
-
LogicVRegister Simulator::tbl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& tab,
@@ -3738,10 +3742,11 @@ LogicVRegister Simulator::sabdl2(VectorFormat vform,
LogicVRegister Simulator::umull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- uxtl(vform, temp1, src1);
- uxtl(vform, temp2, src2);
+ uxtl(vform, temp1, src1, is_2);
+ uxtl(vform, temp2, src2, is_2);
mul(vform, dst, temp1, temp2);
return dst;
}
@@ -3751,21 +3756,18 @@ LogicVRegister Simulator::umull2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- uxtl2(vform, temp1, src1);
- uxtl2(vform, temp2, src2);
- mul(vform, dst, temp1, temp2);
- return dst;
+ return umull(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::smull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- sxtl(vform, temp1, src1);
- sxtl(vform, temp2, src2);
+ sxtl(vform, temp1, src1, is_2);
+ sxtl(vform, temp2, src2, is_2);
mul(vform, dst, temp1, temp2);
return dst;
}
@@ -3775,21 +3777,18 @@ LogicVRegister Simulator::smull2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- sxtl2(vform, temp1, src1);
- sxtl2(vform, temp2, src2);
- mul(vform, dst, temp1, temp2);
- return dst;
+ return smull(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::umlsl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- uxtl(vform, temp1, src1);
- uxtl(vform, temp2, src2);
+ uxtl(vform, temp1, src1, is_2);
+ uxtl(vform, temp2, src2, is_2);
mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3799,21 +3798,18 @@ LogicVRegister Simulator::umlsl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- uxtl2(vform, temp1, src1);
- uxtl2(vform, temp2, src2);
- mls(vform, dst, dst, temp1, temp2);
- return dst;
+ return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::smlsl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- sxtl(vform, temp1, src1);
- sxtl(vform, temp2, src2);
+ sxtl(vform, temp1, src1, is_2);
+ sxtl(vform, temp2, src2, is_2);
mls(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3823,21 +3819,18 @@ LogicVRegister Simulator::smlsl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- sxtl2(vform, temp1, src1);
- sxtl2(vform, temp2, src2);
- mls(vform, dst, dst, temp1, temp2);
- return dst;
+ return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::umlal(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- uxtl(vform, temp1, src1);
- uxtl(vform, temp2, src2);
+ uxtl(vform, temp1, src1, is_2);
+ uxtl(vform, temp2, src2, is_2);
mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3847,21 +3840,18 @@ LogicVRegister Simulator::umlal2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- uxtl2(vform, temp1, src1);
- uxtl2(vform, temp2, src2);
- mla(vform, dst, dst, temp1, temp2);
- return dst;
+ return umlal(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::smlal(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp1, temp2;
- sxtl(vform, temp1, src1);
- sxtl(vform, temp2, src2);
+ sxtl(vform, temp1, src1, is_2);
+ sxtl(vform, temp2, src2, is_2);
mla(vform, dst, dst, temp1, temp2);
return dst;
}
@@ -3871,20 +3861,17 @@ LogicVRegister Simulator::smlal2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp1, temp2;
- sxtl2(vform, temp1, src1);
- sxtl2(vform, temp2, src2);
- mla(vform, dst, dst, temp1, temp2);
- return dst;
+ return smlal(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::sqdmlal(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp;
- LogicVRegister product = sqdmull(vform, temp, src1, src2);
+ LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
return add(vform, dst, dst, product).SignedSaturate(vform);
}
@@ -3893,18 +3880,17 @@ LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp;
- LogicVRegister product = sqdmull2(vform, temp, src1, src2);
- return add(vform, dst, dst, product).SignedSaturate(vform);
+ return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp;
- LogicVRegister product = sqdmull(vform, temp, src1, src2);
+ LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
return sub(vform, dst, dst, product).SignedSaturate(vform);
}
@@ -3913,18 +3899,17 @@ LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp;
- LogicVRegister product = sqdmull2(vform, temp, src1, src2);
- return sub(vform, dst, dst, product).SignedSaturate(vform);
+ return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
}
LogicVRegister Simulator::sqdmull(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2) {
+ const LogicVRegister& src2,
+ bool is_2) {
SimVRegister temp;
- LogicVRegister product = smull(vform, temp, src1, src2);
+ LogicVRegister product = smull(vform, temp, src1, src2, is_2);
return add(vform, dst, product, product).SignedSaturate(vform);
}
@@ -3933,38 +3918,49 @@ LogicVRegister Simulator::sqdmull2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- SimVRegister temp;
- LogicVRegister product = smull2(vform, temp, src1, src2);
- return add(vform, dst, product, product).SignedSaturate(vform);
+ return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
}
-
LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
bool round) {
- // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
- // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
- // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
-
int esize = LaneSizeInBitsFromFormat(vform);
- int round_const = round ? (1 << (esize - 2)) : 0;
- int64_t product;
+ SimVRegister temp_lo, temp_hi;
+
+ // Compute low and high multiplication results.
+ mul(vform, temp_lo, src1, src2);
+ smulh(vform, temp_hi, src1, src2);
+
+ // Double by shifting high half, and adding in most-significant bit of low
+ // half.
+ shl(vform, temp_hi, temp_hi, 1);
+ usra(vform, temp_hi, temp_lo, esize - 1);
+
+ if (round) {
+ // Add the second (due to doubling) most-significant bit of the low half
+ // into the result.
+ shl(vform, temp_lo, temp_lo, 1);
+ usra(vform, temp_hi, temp_lo, esize - 1);
+ }
+
+ SimPRegister not_sat;
+ LogicPRegister ptemp(not_sat);
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- product = src1.Int(vform, i) * src2.Int(vform, i);
- product += round_const;
- product = product >> (esize - 1);
-
- if (product > MaxIntFromFormat(vform)) {
- product = MaxIntFromFormat(vform);
- } else if (product < MinIntFromFormat(vform)) {
- product = MinIntFromFormat(vform);
+ // Saturation only occurs when src1 = src2 = minimum representable value.
+ // Check this as a special case.
+ ptemp.SetActive(vform, i, true);
+ if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
+ (src2.Int(vform, i) == MinIntFromFormat(vform))) {
+ ptemp.SetActive(vform, i, false);
}
- dst.SetInt(vform, i, product);
+ dst.SetInt(vform, i, MaxIntFromFormat(vform));
}
+
+ mov_merging(vform, dst, not_sat, temp_hi);
return dst;
}
@@ -3973,7 +3969,8 @@ LogicVRegister Simulator::dot(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
- bool is_signed) {
+ bool is_src1_signed,
+ bool is_src2_signed) {
VectorFormat quarter_vform =
VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
@@ -3983,11 +3980,14 @@ LogicVRegister Simulator::dot(VectorFormat vform,
int64_t element1, element2;
for (int i = 0; i < 4; i++) {
int index = 4 * e + i;
- if (is_signed) {
+ if (is_src1_signed) {
element1 = src1.Int(quarter_vform, index);
- element2 = src2.Int(quarter_vform, index);
} else {
element1 = src1.Uint(quarter_vform, index);
+ }
+ if (is_src2_signed) {
+ element2 = src2.Int(quarter_vform, index);
+ } else {
element2 = src2.Uint(quarter_vform, index);
}
result += element1 * element2;
@@ -4002,7 +4002,7 @@ LogicVRegister Simulator::sdot(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- return dot(vform, dst, src1, src2, true);
+ return dot(vform, dst, src1, src2, true, true);
}
@@ -4010,9 +4010,148 @@ LogicVRegister Simulator::udot(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- return dot(vform, dst, src1, src2, false);
+ return dot(vform, dst, src1, src2, false, false);
}
+LogicVRegister Simulator::usdot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ return dot(vform, dst, src1, src2, false, true);
+}
+
+LogicVRegister Simulator::cdot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& acc,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot) {
+ VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
+ VectorFormat quarter_vform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
+
+ int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
+ int sel_b = 1 - sel_a;
+ int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t result = acc.Int(vform, i);
+ for (int j = 0; j < 2; j++) {
+ int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
+ int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
+ int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
+ int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
+ result += (r1 * r2) + (sub_i * i1 * i2);
+ }
+ dst.SetInt(vform, i, result);
+ }
+ return dst;
+}
+
+LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot) {
+ SimVRegister src1_a, src1_b;
+ SimVRegister src2_a, src2_b;
+ SimVRegister srca_i, srca_r;
+ SimVRegister zero, temp;
+ zero.Clear();
+
+ if ((rot == 0) || (rot == 180)) {
+ uzp1(vform, src1_a, src1, zero);
+ uzp1(vform, src2_a, src2, zero);
+ uzp2(vform, src2_b, src2, zero);
+ } else {
+ uzp2(vform, src1_a, src1, zero);
+ uzp2(vform, src2_a, src2, zero);
+ uzp1(vform, src2_b, src2, zero);
+ }
+
+ uzp1(vform, srca_r, srca, zero);
+ uzp2(vform, srca_i, srca, zero);
+
+ bool sub_r = (rot == 90) || (rot == 180);
+ bool sub_i = (rot == 180) || (rot == 270);
+
+ const bool round = true;
+ sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
+ sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
+ zip1(vform, dst, srca_r, srca_i);
+ return dst;
+}
+
+LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index,
+ int rot) {
+ SimVRegister temp;
+ dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
+ return sqrdcmlah(vform, dst, srca, src1, temp, rot);
+}
+
+LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool round,
+ bool sub_op) {
+ // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
+ // To avoid this, we use:
+ // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
+ // which is same as:
+ // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
+
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ int esize = kDRegSize;
+ vixl_uint128_t round_const, accum;
+ round_const.first = 0;
+ if (round) {
+ round_const.second = UINT64_C(1) << (esize - 2);
+ } else {
+ round_const.second = 0;
+ }
+
+ dst.ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Shift the whole value left by `esize - 1` bits.
+ accum.first = dst.Int(vform, i) >> 1;
+ accum.second = dst.Int(vform, i) << (esize - 1);
+
+ vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
+
+ if (sub_op) {
+ product = Neg128(product);
+ }
+ accum = Add128(accum, product);
+
+ // Perform rounding.
+ accum = Add128(accum, round_const);
+
+ // Arithmetic shift the whole value right by `esize - 1` bits.
+ accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
+ accum.first = -(accum.first >> (esize - 1));
+
+ // Perform saturation.
+ bool is_pos = (accum.first == 0) ? true : false;
+ if (is_pos &&
+ (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
+ accum.second = MaxIntFromFormat(vform);
+ } else if (!is_pos && (accum.second <
+ static_cast<uint64_t>(MinIntFromFormat(vform)))) {
+ accum.second = MinIntFromFormat(vform);
+ }
+
+ dst.SetInt(vform, i, accum.second);
+ }
+
+ return dst;
+}
LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
LogicVRegister dst,
@@ -4026,6 +4165,10 @@ LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
// which is same as:
// (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
+ if (vform == kFormatVnD) {
+ return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
+ }
+
int esize = LaneSizeInBitsFromFormat(vform);
int round_const = round ? (1 << (esize - 2)) : 0;
int64_t accum;
@@ -4170,7 +4313,7 @@ LogicVRegister Simulator::trn1(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[kZRegMaxSizeInBytes];
+ uint64_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
@@ -4190,7 +4333,7 @@ LogicVRegister Simulator::trn2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[kZRegMaxSizeInBytes];
+ uint64_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
@@ -4210,7 +4353,7 @@ LogicVRegister Simulator::zip1(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[kZRegMaxSizeInBytes];
+ uint64_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
@@ -4230,7 +4373,7 @@ LogicVRegister Simulator::zip2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- uint64_t result[kZRegMaxSizeInBytes];
+ uint64_t result[kZRegMaxSizeInBytes] = {};
int lane_count = LaneCountFromFormat(vform);
int pairs = lane_count / 2;
for (int i = 0; i < pairs; ++i) {
@@ -4283,6 +4426,27 @@ LogicVRegister Simulator::uzp2(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ // Interleave the top and bottom half of a vector, ie. for a vector:
+ //
+ // [ ... | F | D | B | ... | E | C | A ]
+ //
+ // where B is the first element in the top half of the vector, produce a
+ // result vector:
+ //
+ // [ ... | ... | F | E | D | C | B | A ]
+
+ uint64_t result[kZRegMaxSizeInBytes] = {};
+ int lane_count = LaneCountFromFormat(vform);
+ for (int i = 0; i < lane_count; i += 2) {
+ result[i] = src.Uint(vform, i / 2);
+ result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
+ }
+ dst.SetUintArray(vform, result);
+ return dst;
+}
template <typename T>
T Simulator::FPNeg(T op) {
@@ -4322,6 +4486,11 @@ T Simulator::FPSub(T op1, T op2) {
}
}
+template <typename T>
+T Simulator::FPMulNaNs(T op1, T op2) {
+ T result = FPProcessNaNs(op1, op2);
+ return IsNaN(result) ? result : FPMul(op1, op2);
+}
template <typename T>
T Simulator::FPMul(T op1, T op2) {
@@ -4755,7 +4924,9 @@ int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
value = FPRoundInt(value, rmode);
- if (value >= kXMaxInt) {
+ // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
+ // as a result of kMaxInt not being representable as a double.
+ if (value >= 9223372036854775808.) {
return kXMaxInt;
} else if (value < kXMinInt) {
return kXMinInt;
@@ -4788,7 +4959,9 @@ uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
value = FPRoundInt(value, rmode);
- if (value >= kXMaxUInt) {
+ // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
+ // as a result of kMaxUInt not being representable as a double.
+ if (value >= 18446744073709551616.) {
return kXMaxUInt;
} else if (value < 0.0) {
return 0;
@@ -5346,6 +5519,9 @@ LogicVRegister Simulator::fsqrt(VectorFormat vform,
uzp1(vform, temp1, src1, src2); \
uzp2(vform, temp2, src1, src2); \
FN(vform, dst, temp1, temp2); \
+ if (IsSVEFormat(vform)) { \
+ interleave_top_bottom(vform, dst, dst); \
+ } \
return dst; \
} \
\
@@ -5628,14 +5804,15 @@ LogicVRegister Simulator::frint(VectorFormat vform,
return dst;
}
-LogicVRegister Simulator::fcvt(VectorFormat vform,
- unsigned dst_data_size_in_bits,
- unsigned src_data_size_in_bits,
+LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
+ VectorFormat src_vform,
LogicVRegister dst,
const LogicPRegister& pg,
const LogicVRegister& src) {
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
- VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+ unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
+ unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
+ VectorFormat vform = SVEFormatFromLaneSizeInBits(
+ std::max(dst_data_size_in_bits, src_data_size_in_bits));
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
if (!pg.IsActive(vform, i)) continue;
@@ -5810,16 +5987,20 @@ LogicVRegister Simulator::fcvtl2(VectorFormat vform,
LogicVRegister Simulator::fcvtn(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
+ SimVRegister tmp;
+ LogicVRegister srctmp = mov(kFormat2D, tmp, src);
+ dst.ClearForWrite(vform);
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
dst.SetFloat(i,
- Float16ToRawbits(
- FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
+ Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
+ FPTieEven,
+ ReadDN())));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
+ dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
}
}
return dst;
@@ -5850,10 +6031,19 @@ LogicVRegister Simulator::fcvtn2(VectorFormat vform,
LogicVRegister Simulator::fcvtxn(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
+ SimVRegister tmp;
+ LogicVRegister srctmp = mov(kFormat2D, tmp, src);
+ int input_lane_count = LaneCountFromFormat(vform);
+ if (IsSVEFormat(vform)) {
+ mov(kFormatVnB, tmp, src);
+ input_lane_count /= 2;
+ }
+
dst.ClearForWrite(vform);
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
- for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
+
+ for (int i = 0; i < input_lane_count; i++) {
+ dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
}
return dst;
}
@@ -5874,14 +6064,14 @@ LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
// Based on reference C function recip_sqrt_estimate from ARM ARM.
double Simulator::recip_sqrt_estimate(double a) {
- int q0, q1, s;
+ int quot0, quot1, s;
double r;
if (a < 0.5) {
- q0 = static_cast<int>(a * 512.0);
- r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
+ quot0 = static_cast<int>(a * 512.0);
+ r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
} else {
- q1 = static_cast<int>(a * 256.0);
- r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
+ quot1 = static_cast<int>(a * 256.0);
+ r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
}
s = static_cast<int>(256.0 * r + 0.5);
return static_cast<double>(s) / 256.0;
@@ -6049,7 +6239,6 @@ T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
} else {
uint64_t fraction;
int exp, result_exp;
- uint32_t sign;
if (IsFloat16<T>()) {
sign = Float16Sign(op);
@@ -6285,6 +6474,52 @@ LogicVRegister Simulator::frecpx(VectorFormat vform,
return dst;
}
+LogicVRegister Simulator::flogb(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ double op = 0.0;
+ switch (vform) {
+ case kFormatVnH:
+ op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
+ break;
+ case kFormatVnS:
+ op = src.Float<float>(i);
+ break;
+ case kFormatVnD:
+ op = src.Float<double>(i);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+
+ switch (std::fpclassify(op)) {
+ case FP_INFINITE:
+ dst.SetInt(vform, i, MaxIntFromFormat(vform));
+ break;
+ case FP_NAN:
+ case FP_ZERO:
+ dst.SetInt(vform, i, MinIntFromFormat(vform));
+ break;
+ case FP_SUBNORMAL: {
+ // DoubleMantissa returns the mantissa of its input, leaving 12 zero
+ // bits where the sign and exponent would be. We subtract 12 to
+ // find the number of leading zero bits in the mantissa itself.
+ int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
+ // Log2 of a subnormal is the lowest exponent a normal number can
+ // represent, together with the zeros in the mantissa.
+ dst.SetInt(vform, i, -1023 - mant_zero_count);
+ break;
+ }
+ case FP_NORMAL:
+ // Log2 of a normal number is the exponent minus the bias.
+ dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
+ break;
+ }
+ }
+ return dst;
+}
+
LogicVRegister Simulator::ftsmul(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -6543,8 +6778,8 @@ LogicVRegister Simulator::fscale(VectorFormat vform,
const LogicVRegister& src2) {
T two = T(2.0);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- T s1 = src1.Float<T>(i);
- if (!IsNaN(s1)) {
+ T src1_val = src1.Float<T>(i);
+ if (!IsNaN(src1_val)) {
int64_t scale = src2.Int(vform, i);
// TODO: this is a low-performance implementation, but it's simple and
// less likely to be buggy. Consider replacing it with something faster.
@@ -6553,19 +6788,19 @@ LogicVRegister Simulator::fscale(VectorFormat vform,
// point iterating further.
scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
- // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
+ // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
// decrement scale until it's zero.
while (scale-- > 0) {
- s1 = FPMul(s1, two);
+ src1_val = FPMul(src1_val, two);
}
// If scale is negative, divide by two and increment scale until it's
// zero. Initially, scale is (src2 - 1), so we pre-increment.
while (++scale < 0) {
- s1 = FPDiv(s1, two);
+ src1_val = FPDiv(src1_val, two);
}
}
- dst.SetFloat<T>(i, s1);
+ dst.SetFloat<T>(i, src1_val);
}
return dst;
}
@@ -6595,6 +6830,7 @@ LogicVRegister Simulator::scvtf(VectorFormat vform,
int fbits) {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+ dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
if (!pg.IsActive(vform, i)) continue;
@@ -6653,6 +6889,7 @@ LogicVRegister Simulator::ucvtf(VectorFormat vform,
int fbits) {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
+ dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
if (!pg.IsActive(vform, i)) continue;
@@ -6883,7 +7120,7 @@ LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
uint64_t op1 = zn.Uint(vform, i);
uint64_t op2 = zm.Uint(vform, i);
- uint64_t result;
+ uint64_t result = 0;
switch (logical_op) {
case AND:
result = op1 & op2;
@@ -6898,7 +7135,6 @@ LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
result = op1 | op2;
break;
default:
- result = 0;
VIXL_UNIMPLEMENTED();
}
zd.SetUint(vform, i, result);
@@ -6914,7 +7150,7 @@ LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
for (int i = 0; i < pn.GetChunkCount(); i++) {
LogicPRegister::ChunkType op1 = pn.GetChunk(i);
LogicPRegister::ChunkType op2 = pm.GetChunk(i);
- LogicPRegister::ChunkType result;
+ LogicPRegister::ChunkType result = 0;
switch (op) {
case ANDS_p_p_pp_z:
case AND_p_p_pp_z:
@@ -6945,7 +7181,6 @@ LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
result = op1 | op2;
break;
default:
- result = 0;
VIXL_UNIMPLEMENTED();
}
pd.SetChunk(i, result);
@@ -6960,7 +7195,7 @@ LogicVRegister Simulator::SVEBitwiseImmHelper(
uint64_t imm) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
uint64_t op1 = zd.Uint(vform, i);
- uint64_t result;
+ uint64_t result = 0;
switch (op) {
case AND_z_zi:
result = op1 & imm;
@@ -6972,7 +7207,6 @@ LogicVRegister Simulator::SVEBitwiseImmHelper(
result = op1 | imm;
break;
default:
- result = 0;
VIXL_UNIMPLEMENTED();
}
zd.SetUint(vform, i, result);
@@ -7363,10 +7597,39 @@ int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
case SVE_ALL:
return all;
}
- // Unnamed cases archicturally return 0.
+ // Unnamed cases architecturally return 0.
return 0;
}
+LogicPRegister Simulator::match(VectorFormat vform,
+ LogicPRegister dst,
+ const LogicVRegister& haystack,
+ const LogicVRegister& needles,
+ bool negate_match) {
+ SimVRegister ztemp;
+ SimPRegister ptemp;
+
+ pfalse(dst);
+ int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
+ for (int i = 0; i < lanes_per_segment; i++) {
+ dup_elements_to_segments(vform, ztemp, needles, i);
+ SVEIntCompareVectorsHelper(eq,
+ vform,
+ ptemp,
+ GetPTrue(),
+ haystack,
+ ztemp,
+ false,
+ LeaveFlags);
+ SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
+ }
+ if (negate_match) {
+ ptrue(vform, ptemp, SVE_ALL);
+ SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
+ }
+ return dst;
+}
+
uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
if (IsContiguous()) {
return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
@@ -7412,6 +7675,160 @@ uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
return base_ + (offset << vector_shift_);
}
+LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ SimVRegister zero;
+ zero.Clear();
+ return uzp2(vform, dst, src, zero);
+}
+
+LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src) {
+ SimVRegister zero;
+ zero.Clear();
+ return uzp1(vform, dst, src, zero);
+}
+
+LogicVRegister Simulator::adcl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool top) {
+ unsigned reg_size = LaneSizeInBitsFromFormat(vform);
+ VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
+
+ for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
+ uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
+ uint64_t right = dst.Uint(vform, i);
+ unsigned carry_in = src2.Uint(vform, i + 1) & 1;
+ std::pair<uint64_t, uint8_t> val_and_flags =
+ AddWithCarry(reg_size, left, right, carry_in);
+
+ // Set even lanes to the result of the addition.
+ dst.SetUint(vform, i, val_and_flags.first);
+
+ // Set odd lanes to the carry flag from the addition.
+ uint64_t carry_out = (val_and_flags.second >> 1) & 1;
+ dst.SetUint(vform, i + 1, carry_out);
+ }
+ return dst;
+}
+
+// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
+// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
+//
+// Matrices of the form:
+//
+// src1 = ( a b c d e f g h ) src2 = ( A B )
+// ( i j k l m n o p ) ( C D )
+// ( E F )
+// ( G H )
+// ( I J )
+// ( K L )
+// ( M N )
+// ( O P )
+//
+// Are stored in the input vector registers as:
+//
+// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+// src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
+// src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
+//
+LogicVRegister Simulator::matmul(VectorFormat vform_dst,
+ LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool src1_signed,
+ bool src2_signed) {
+ // Two destination forms are supported: Q register containing four S-sized
+ // elements (4S) and Z register containing n S-sized elements (VnS).
+ VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
+ VectorFormat vform_src = kFormatVnB;
+ int b_per_segment = kQRegSize / kBRegSize;
+ int s_per_segment = kQRegSize / kSRegSize;
+ int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
+ int segment_count = LaneCountFromFormat(vform_dst) / 4;
+ for (int seg = 0; seg < segment_count; seg++) {
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ int dstidx = (2 * i) + j + (seg * s_per_segment);
+ int64_t sum = srcdst.Int(vform_dst, dstidx);
+ for (int k = 0; k < 8; k++) {
+ int idx1 = (8 * i) + k + (seg * b_per_segment);
+ int idx2 = (8 * j) + k + (seg * b_per_segment);
+ int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
+ : src1.Uint(vform_src, idx1);
+ int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
+ : src2.Uint(vform_src, idx2);
+ sum += e1 * e2;
+ }
+ result[dstidx] = sum;
+ }
+ }
+ }
+ srcdst.SetIntArray(vform_dst, result);
+ return srcdst;
+}
+
+// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
+// result to the matrix in srcdst, and write back to srcdst.
+//
+// Matrices of the form:
+//
+// src1 = ( a b ) src2 = ( A B )
+// ( c d ) ( C D )
+//
+// Are stored in the input vector registers as:
+//
+// 3 2 1 0
+// src1 = [ d | c | b | a ]
+// src2 = [ D | B | C | A ]
+//
+template <typename T>
+LogicVRegister Simulator::fmatmul(VectorFormat vform,
+ LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ T result[kZRegMaxSizeInBytes / sizeof(T)];
+ int T_per_segment = 4;
+ int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
+ for (int seg = 0; seg < segment_count; seg++) {
+ int segoff = seg * T_per_segment;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
+ src2.Float<T>(2 * j + 0 + segoff));
+ T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
+ src2.Float<T>(2 * j + 1 + segoff));
+ T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
+ result[2 * i + j + segoff] = FPAdd(sum, prod1);
+ }
+ }
+ }
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ // Elements outside a multiple of 4T are set to zero. This happens only
+ // for double precision operations, when the VL is a multiple of 128 bits,
+ // but not a mutiple of 256 bits.
+ T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
+ srcdst.SetFloat<T>(vform, i, value);
+ }
+ return srcdst;
+}
+
+LogicVRegister Simulator::fmatmul(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ fmatmul<float>(vform, dst, src1, src2);
+ } else {
+ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
+ fmatmul<double>(vform, dst, src1, src2);
+ }
+ return dst;
+}
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index 56c6eaf6..e18f846a 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -133,14 +133,14 @@ void LiteralPool::Emit(EmitOption option) {
masm_->SetAllowMacroInstructions(false);
#endif
if (option == kBranchRequired) {
- ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
+ ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
masm_->b(&end_of_pool);
}
{
// Marker indicating the size of the literal pool in 32-bit words.
VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
- ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
+ ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
}
@@ -510,44 +510,6 @@ int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
}
-bool MacroAssembler::OneInstrMoveImmediateHelper(MacroAssembler* masm,
- const Register& dst,
- uint64_t imm) {
- bool emit_code = masm != NULL;
- unsigned n, imm_s, imm_r;
- int reg_size = dst.GetSizeInBits();
-
- if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
- // Immediate can be represented in a move zero instruction. Movz can't write
- // to the stack pointer.
- if (emit_code) {
- masm->movz(dst, imm);
- }
- return true;
- } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
- // Immediate can be represented in a move negative instruction. Movn can't
- // write to the stack pointer.
- if (emit_code) {
- masm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
- }
- return true;
- } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
- // Immediate can be represented in a logical orr instruction.
- VIXL_ASSERT(!dst.IsZero());
- if (emit_code) {
- masm->LogicalImmediate(dst,
- AppropriateZeroRegFor(dst),
- n,
- imm_s,
- imm_r,
- ORR);
- }
- return true;
- }
- return false;
-}
-
-
void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
@@ -1135,11 +1097,15 @@ void MacroAssembler::Movi(const VRegister& vd,
void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
// TODO: Move 128-bit values in a more efficient way.
VIXL_ASSERT(vd.Is128Bits());
- UseScratchRegisterScope temps(this);
Movi(vd.V2D(), lo);
- Register temp = temps.AcquireX();
- Mov(temp, hi);
- Ins(vd.V2D(), 1, temp);
+ if (hi != lo) {
+ UseScratchRegisterScope temps(this);
+ // TODO: Figure out if using a temporary V register to materialise the
+ // immediate is better.
+ Register temp = temps.AcquireX();
+ Mov(temp, hi);
+ Ins(vd.V2D(), 1, temp);
+ }
}
@@ -1946,6 +1912,9 @@ LS_MACRO_LIST(DEFINE_FUNCTION)
void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
const MemOperand& addr,
LoadStoreOp op) {
+ VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() ||
+ addr.IsImmediatePreIndex() || addr.IsRegisterOffset());
+
// Worst case is ldr/str pre/post index:
// * 1 instruction for ldr/str
// * up to 4 instructions to materialise the constant
@@ -1966,11 +1935,11 @@ void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
Mov(temp, addr.GetOffset());
LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
- } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
+ } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) {
// Post-index beyond unscaled addressing range.
LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
- } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
+ } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) {
// Pre-index beyond unscaled addressing range.
Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
@@ -2018,11 +1987,11 @@ void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
Register temp = temps.AcquireSameSizeAs(base);
Add(temp, base, offset);
LoadStorePair(rt, rt2, MemOperand(temp), op);
- } else if (addr.IsPostIndex()) {
+ } else if (addr.IsImmediatePostIndex()) {
LoadStorePair(rt, rt2, MemOperand(base), op);
Add(base, base, offset);
} else {
- VIXL_ASSERT(addr.IsPreIndex());
+ VIXL_ASSERT(addr.IsImmediatePreIndex());
Add(base, base, offset);
LoadStorePair(rt, rt2, MemOperand(base), op);
}
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 8becddbb..b1e9ec5c 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -716,9 +716,6 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
static int MoveImmediateHelper(MacroAssembler* masm,
const Register& rd,
uint64_t imm);
- static bool OneInstrMoveImmediateHelper(MacroAssembler* masm,
- const Register& dst,
- uint64_t imm);
// Logical macros.
@@ -2819,7 +2816,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
V(uzp1, Uzp1) \
V(uzp2, Uzp2) \
V(zip1, Zip1) \
- V(zip2, Zip2)
+ V(zip2, Zip2) \
+ V(smmla, Smmla) \
+ V(ummla, Ummla) \
+ V(usmmla, Usmmla) \
+ V(usdot, Usdot)
#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
void MASM(const VRegister& vd, const VRegister& vn, const VRegister& vm) { \
@@ -2971,7 +2972,10 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
V(umlal, Umlal) \
V(umlal2, Umlal2) \
V(umlsl, Umlsl) \
- V(umlsl2, Umlsl2)
+ V(umlsl2, Umlsl2) \
+ V(sudot, Sudot) \
+ V(usdot, Usdot)
+
#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
void MASM(const VRegister& vd, \
@@ -3056,13 +3060,19 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
V(mul, Mul) \
V(orr, Orr) \
V(sabd, Sabd) \
+ V(shadd, Shadd) \
V(smax, Smax) \
- V(smulh, Smulh) \
V(smin, Smin) \
+ V(smulh, Smulh) \
+ V(sqadd, Sqadd) \
+ V(srhadd, Srhadd) \
V(uabd, Uabd) \
+ V(uhadd, Uhadd) \
V(umax, Umax) \
V(umin, Umin) \
- V(umulh, Umulh)
+ V(umulh, Umulh) \
+ V(uqadd, Uqadd) \
+ V(urhadd, Urhadd)
#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \
void MASM(const ZRegister& zd, \
@@ -4103,7 +4113,11 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
void Ext(const ZRegister& zd,
const ZRegister& zn,
const ZRegister& zm,
- unsigned offset);
+ unsigned offset) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ext(zd, zn, zm, offset);
+ }
void Fabd(const ZRegister& zd,
const PRegisterM& pg,
const ZRegister& zn,
@@ -4245,15 +4259,12 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
SingleEmissionCheckScope guard(this);
fcmgt(pd, pg, zn, zm);
}
- void Fcmla(const ZRegister& zda,
+ void Fcmla(const ZRegister& zd,
const PRegisterM& pg,
+ const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm,
- int rot) {
- VIXL_ASSERT(allow_macro_instructions_);
- MovprfxHelperScope guard(this, zda, pg, zda);
- fcmla(zda, pg, zn, zm, rot);
- }
+ int rot);
void Fcmla(const ZRegister& zda,
const ZRegister& zn,
const ZRegister& zm,
@@ -4862,6 +4873,18 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
void Ld1rqw(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr);
+ void Ld1rob(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1rod(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1roh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
+ void Ld1row(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr);
void Ld1rsb(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr) {
@@ -6366,6 +6389,1103 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
zip2(zd, zn, zm);
}
+ // SVE2
+ void Adclb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Adclt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ addhnb(zd, zn, zm);
+ }
+ void Addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ addhnt(zd, zn, zm);
+ }
+ void Addp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Bcax(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bdep(zd, zn, zm);
+ }
+ void Bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bext(zd, zn, zm);
+ }
+ void Bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bgrp(zd, zn, zm);
+ }
+ void Bsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Bsl1n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Bsl2n(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Cadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Cdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+ void Cdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Cmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+ void Cmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Eor3(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eorbt(zd, zn, zm);
+ }
+ void Eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eortb(zd, zn, zm);
+ }
+ void Faddp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvtlt(zd, pg, zn);
+ }
+ void Fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvtnt(zd, pg, zn);
+ }
+ void Fcvtx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(zn.IsLaneSizeD());
+ MovprfxHelperScope guard(this, zd.VnD(), pg, zd.VnD());
+ fcvtx(zd, pg.Merging(), zn);
+ }
+ void Fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ fcvtxnt(zd, pg, zn);
+ }
+ void Flogb(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zd);
+ flogb(zd, pg.Merging(), zn);
+ }
+ void Fmaxnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fminnmp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Fmlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Fmlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Fmlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Fmlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Histcnt(const ZRegister& zd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ histcnt(zd, pg, zn, zm);
+ }
+ void Histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ histseg(zd, zn, zm);
+ }
+ void Ldnt1sb(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnt1sb(zt, pg, addr);
+ }
+ void Ldnt1sh(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnt1sh(zt, pg, addr);
+ }
+ void Ldnt1sw(const ZRegister& zt,
+ const PRegisterZ& pg,
+ const SVEMemOperand& addr) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldnt1sw(zt, pg, addr);
+ }
+ void Match(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ match(pd, pg, zn, zm);
+ }
+ void Mla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Mls(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Mul(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mul(zd, zn, zm, index);
+ }
+ void Mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ mul(zd, zn, zm);
+ }
+ void Nbsl(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ const ZRegister& zk);
+ void Nmatch(const PRegisterWithLaneSize& pd,
+ const PRegisterZ& pg,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ nmatch(pd, pg, zn, zm);
+ }
+ void Pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ pmul(zd, zn, zm);
+ }
+ void Pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ pmullb(zd, zn, zm);
+ }
+ void Pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ pmullt(zd, zn, zm);
+ }
+ void Raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ raddhnb(zd, zn, zm);
+ }
+ void Raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ raddhnt(zd, zn, zm);
+ }
+ void Rshrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rshrnb(zd, zn, shift);
+ }
+ void Rshrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rshrnt(zd, zn, shift);
+ }
+ void Rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rsubhnb(zd, zn, zm);
+ }
+ void Rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ rsubhnt(zd, zn, zm);
+ }
+ void Saba(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sabalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sabalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sabdlb(zd, zn, zm);
+ }
+ void Sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sabdlt(zd, zn, zm);
+ }
+ void Sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sadalp(zda, pg, zn);
+ }
+ void Saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddlb(zd, zn, zm);
+ }
+ void Saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddlbt(zd, zn, zm);
+ }
+ void Saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddlt(zd, zn, zm);
+ }
+ void Saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddwb(zd, zn, zm);
+ }
+ void Saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ saddwt(zd, zn, zm);
+ }
+ void Sbclb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sbclt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Shrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ shrnb(zd, zn, shift);
+ }
+ void Shrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ shrnt(zd, zn, shift);
+ }
+ void Shsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sli(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sli(zd, zn, shift);
+ }
+ void Smaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Smlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Smlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Smlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Smlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smulh(zd, zn, zm);
+ }
+ void Smullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smullb(zd, zn, zm, index);
+ }
+ void Smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smullb(zd, zn, zm);
+ }
+ void Smullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smullt(zd, zn, zm, index);
+ }
+ void Smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ smullt(zd, zn, zm);
+ }
+ void Sqabs(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zd);
+ sqabs(zd, pg.Merging(), zn);
+ }
+ void Sqcadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Sqdmlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqdmlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmlalbt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqdmlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqdmlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmlslbt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqdmlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmulh(zd, zn, zm, index);
+ }
+ void Sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmulh(zd, zn, zm);
+ }
+ void Sqdmullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmullb(zd, zn, zm, index);
+ }
+ void Sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmullb(zd, zn, zm);
+ }
+ void Sqdmullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmullt(zd, zn, zm, index);
+ }
+ void Sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqdmullt(zd, zn, zm);
+ }
+ void Sqneg(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zd);
+ sqneg(zd, pg.Merging(), zn);
+ }
+ void Sqrdcmlah(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index,
+ int rot);
+ void Sqrdcmlah(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
+ void Sqrdmlah(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqrdmlah(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqrdmlsh(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqrdmlsh(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sqrdmulh(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrdmulh(zd, zn, zm, index);
+ }
+ void Sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrdmulh(zd, zn, zm);
+ }
+ void Sqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrshrnb(zd, zn, shift);
+ }
+ void Sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrshrnt(zd, zn, shift);
+ }
+ void Sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrshrunb(zd, zn, shift);
+ }
+ void Sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqrshrunt(zd, zn, shift);
+ }
+ void Sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ sqshl(zd, pg, zd, shift);
+ }
+ void Sqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqshlu(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ sqshlu(zd, pg, zd, shift);
+ }
+ void Sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqshrnb(zd, zn, shift);
+ }
+ void Sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqshrnt(zd, zn, shift);
+ }
+ void Sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqshrunb(zd, zn, shift);
+ }
+ void Sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqshrunt(zd, zn, shift);
+ }
+ void Sqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Sqxtnb(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqxtnb(zd, zn);
+ }
+ void Sqxtnt(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqxtnt(zd, zn);
+ }
+ void Sqxtunb(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqxtunb(zd, zn);
+ }
+ void Sqxtunt(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sqxtunt(zd, zn);
+ }
+ void Sri(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sri(zd, zn, shift);
+ }
+ void Srshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Srshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ srshr(zd, pg, zd, shift);
+ }
+ void Srsra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift);
+ void Sshllb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sshllb(zd, zn, shift);
+ }
+ void Sshllt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ sshllt(zd, zn, shift);
+ }
+ void Ssra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift);
+ void Ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssublb(zd, zn, zm);
+ }
+ void Ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssublbt(zd, zn, zm);
+ }
+ void Ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssublt(zd, zn, zm);
+ }
+ void Ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssubltb(zd, zn, zm);
+ }
+ void Ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssubwb(zd, zn, zm);
+ }
+ void Ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ssubwt(zd, zn, zm);
+ }
+ void Subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ subhnb(zd, zn, zm);
+ }
+ void Subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ subhnt(zd, zn, zm);
+ }
+ void Suqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Tbl(const ZRegister& zd,
+ const ZRegister& zn1,
+ const ZRegister& zn2,
+ const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ tbl(zd, zn1, zn2, zm);
+ }
+ void Tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ tbx(zd, zn, zm);
+ }
+ void Uaba(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uabalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uabalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uabdlb(zd, zn, zm);
+ }
+ void Uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uabdlt(zd, zn, zm);
+ }
+ void Uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uadalp(zda, pg, zn);
+ }
+ void Uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uaddlb(zd, zn, zm);
+ }
+ void Uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uaddlt(zd, zn, zm);
+ }
+ void Uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uaddwb(zd, zn, zm);
+ }
+ void Uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uaddwt(zd, zn, zm);
+ }
+ void Uhsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umaxp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uminp(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlalb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlalt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlslb(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Umlslt(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umulh(zd, zn, zm);
+ }
+ void Umullb(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umullb(zd, zn, zm, index);
+ }
+ void Umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umullb(zd, zn, zm);
+ }
+ void Umullt(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umullt(zd, zn, zm, index);
+ }
+ void Umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ umullt(zd, zn, zm);
+ }
+ void Uqrshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqrshrnb(zd, zn, shift);
+ }
+ void Uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqrshrnt(zd, zn, shift);
+ }
+ void Uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ uqshl(zd, pg, zd, shift);
+ }
+ void Uqshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqshrnb(zd, zn, shift);
+ }
+ void Uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqshrnt(zd, zn, shift);
+ }
+ void Uqsub(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Uqxtnb(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqxtnb(zd, zn);
+ }
+ void Uqxtnt(const ZRegister& zd, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ uqxtnt(zd, zn);
+ }
+ void Urecpe(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zd);
+ urecpe(zd, pg.Merging(), zn);
+ }
+ void Urshl(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Urshr(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zn);
+ urshr(zd, pg, zd, shift);
+ }
+ void Ursqrte(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ MovprfxHelperScope guard(this, zd, pg, zd);
+ ursqrte(zd, pg.Merging(), zn);
+ }
+ void Ursra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift);
+ void Ushllb(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ushllb(zd, zn, shift);
+ }
+ void Ushllt(const ZRegister& zd, const ZRegister& zn, int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ushllt(zd, zn, shift);
+ }
+ void Usqadd(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Usra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift);
+ void Usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ usublb(zd, zn, zm);
+ }
+ void Usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ usublt(zd, zn, zm);
+ }
+ void Usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ usubwb(zd, zn, zm);
+ }
+ void Usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ usubwt(zd, zn, zm);
+ }
+ void Whilege(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilege(pd, rn, rm);
+ }
+ void Whilegt(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilegt(pd, rn, rm);
+ }
+ void Whilehi(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilehi(pd, rn, rm);
+ }
+ void Whilehs(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilehs(pd, rn, rm);
+ }
+ void Whilerw(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilerw(pd, rn, rm);
+ }
+ void Whilewr(const PRegisterWithLaneSize& pd,
+ const Register& rn,
+ const Register& rm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ whilewr(pd, rn, rm);
+ }
+ void Xar(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (zd.Aliases(zm)) {
+ SingleEmissionCheckScope guard(this);
+ xar(zd, zm, zn, shift);
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ xar(zd, zd, zm, shift);
+ }
+ }
+ void Fmmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Smmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Ummla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Usmmla(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Usdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+ void Usdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+ void Sudot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index);
+
template <typename T>
Literal<T>* CreateLiteralDestroyedWithPool(T value) {
return new Literal<T>(value,
@@ -6783,7 +7903,7 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
// SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as
// vl_divisor_log2; pass -1 to indicate no dependency.
template <typename Tg, typename Tf>
- void SVELoadStoreScalarImmHelper(
+ void SVELoadStoreNTBroadcastQOHelper(
const ZRegister& zt,
const Tg& pg,
const SVEMemOperand& addr,
@@ -6816,25 +7936,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
int imm,
int shift);
- typedef void (Assembler::*IntArithFn)(const ZRegister& zd,
- const ZRegister& zn,
- const ZRegister& zm);
+ typedef void (Assembler::*Int3ArithFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ typedef void (Assembler::*Int4ArithFn)(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
- typedef void (Assembler::*IntWideImmFn)(const ZRegister& zd,
- const ZRegister& zn,
- int imm);
+ typedef void (Assembler::*IntArithImmFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ int imm);
- typedef void (Assembler::*IntArithIndexFn)(const ZRegister& zd,
- const ZRegister& zn,
- const ZRegister& zm,
- int index);
+ typedef void (Assembler::*ZZZImmFn)(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm);
typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd,
const PRegisterM& pg,
const ZRegister& zn,
const ZRegister& zm);
- void IntWideImmHelper(IntWideImmFn imm_fn,
+ void IntWideImmHelper(IntArithImmFn imm_fn,
SVEArithPredicatedFn reg_fn,
const ZRegister& zd,
const ZRegister& zn,
@@ -6860,18 +7985,30 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
const ZRegister& zn,
IntegerOperand imm);
- void SVESdotUdotHelper(IntArithFn fn,
+ void AbsoluteDifferenceAccumulate(Int3ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void FourRegDestructiveHelper(Int3ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void FourRegDestructiveHelper(Int4ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm);
+
+ void SVEDotIndexHelper(ZZZImmFn fn,
const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
- const ZRegister& zm);
-
- void SVESdotUdotIndexHelper(IntArithIndexFn fn,
- const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index);
+ const ZRegister& zm,
+ int index);
// For noncommutative arithmetic operations.
void NoncommutativeArithmeticHelper(const ZRegister& zd,
@@ -6915,12 +8052,24 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface {
const ZRegister& zm,
int index);
- void FPMulAddIndexHelper(SVEMulAddIndexFn fn,
- const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index);
+ void FourRegOneImmDestructiveHelper(ZZZImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm);
+
+ void ShiftRightAccumulate(IntArithImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int imm);
+
+ void ComplexAddition(ZZZImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot);
// Tell whether any of the macro instruction can be used. When false the
// MacroAssembler will assert if a method which can emit a variable number
diff --git a/src/aarch64/macro-assembler-sve-aarch64.cc b/src/aarch64/macro-assembler-sve-aarch64.cc
index b107f132..6bf56076 100644
--- a/src/aarch64/macro-assembler-sve-aarch64.cc
+++ b/src/aarch64/macro-assembler-sve-aarch64.cc
@@ -89,7 +89,7 @@ bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
return false;
}
-void MacroAssembler::IntWideImmHelper(IntWideImmFn imm_fn,
+void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
SVEArithPredicatedFn reg_macro,
const ZRegister& zd,
const ZRegister& zn,
@@ -130,7 +130,7 @@ void MacroAssembler::Mul(const ZRegister& zd,
const ZRegister& zn,
IntegerOperand imm) {
VIXL_ASSERT(allow_macro_instructions_);
- IntWideImmFn imm_fn = &Assembler::mul;
+ IntArithImmFn imm_fn = &Assembler::mul;
SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
}
@@ -140,7 +140,7 @@ void MacroAssembler::Smin(const ZRegister& zd,
IntegerOperand imm) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(imm.FitsInSignedLane(zd));
- IntWideImmFn imm_fn = &Assembler::smin;
+ IntArithImmFn imm_fn = &Assembler::smin;
SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
}
@@ -150,7 +150,7 @@ void MacroAssembler::Smax(const ZRegister& zd,
IntegerOperand imm) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(imm.FitsInSignedLane(zd));
- IntWideImmFn imm_fn = &Assembler::smax;
+ IntArithImmFn imm_fn = &Assembler::smax;
SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
}
@@ -160,7 +160,7 @@ void MacroAssembler::Umax(const ZRegister& zd,
IntegerOperand imm) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
- IntWideImmFn imm_fn = &Assembler::umax;
+ IntArithImmFn imm_fn = &Assembler::umax;
SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
}
@@ -170,7 +170,7 @@ void MacroAssembler::Umin(const ZRegister& zd,
IntegerOperand imm) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
- IntWideImmFn imm_fn = &Assembler::umin;
+ IntArithImmFn imm_fn = &Assembler::umin;
SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
}
@@ -562,80 +562,143 @@ void MacroAssembler::FPCommutativeArithmeticHelper(
}
}
-void MacroAssembler::Asr(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::asr),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::asrr));
-}
-
-void MacroAssembler::Lsl(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::lsl),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::lslr));
-}
-
-void MacroAssembler::Lsr(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::lsr),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::lsrr));
-}
-
-void MacroAssembler::Fdiv(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::fdiv),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::fdivr));
-}
-
-void MacroAssembler::Fsub(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::fsub),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::fsubr));
-}
+// Instructions of the form "inst zda, zn, zm, #num", where they are
+// non-commutative and no reversed form is provided.
+#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
+ V(Cmla, cmla) \
+ V(Sqrdcmlah, sqrdcmlah)
+
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const ZRegister& za, \
+ const ZRegister& zn, \
+ const ZRegister& zm, \
+ int imm) { \
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
+ UseScratchRegisterScope temps(this); \
+ VIXL_ASSERT(AreSameLaneSize(zn, zm)); \
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \
+ Mov(ztmp, zd.Aliases(zn) ? zn : zm); \
+ MovprfxHelperScope guard(this, zd, za); \
+ ASMFN(zd, \
+ (zd.Aliases(zn) ? ztmp : zn), \
+ (zd.Aliases(zm) ? ztmp : zm), \
+ imm); \
+ } else { \
+ MovprfxHelperScope guard(this, zd, za); \
+ ASMFN(zd, zn, zm, imm); \
+ } \
+ }
+VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
+// Instructions of the form "inst zda, zn, zm, #num, #num", where they are
+// non-commutative and no reversed form is provided.
+#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
+ V(Cmla, cmla) \
+ V(Sqrdcmlah, sqrdcmlah)
+
+// This doesn't handle zm when it's out of the range that can be encoded in
+// instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const ZRegister& za, \
+ const ZRegister& zn, \
+ const ZRegister& zm, \
+ int index, \
+ int rot) { \
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
+ UseScratchRegisterScope temps(this); \
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \
+ { \
+ MovprfxHelperScope guard(this, ztmp, za); \
+ ASMFN(ztmp, zn, zm, index, rot); \
+ } \
+ Mov(zd, ztmp); \
+ } else { \
+ MovprfxHelperScope guard(this, zd, za); \
+ ASMFN(zd, zn, zm, index, rot); \
+ } \
+ }
+VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
+// Instructions of the form "inst zda, pg, zda, zn", where they are
+// non-commutative and no reversed form is provided.
+#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
+ V(Addp, addp) \
+ V(Faddp, faddp) \
+ V(Fmaxnmp, fmaxnmp) \
+ V(Fminnmp, fminnmp) \
+ V(Fmaxp, fmaxp) \
+ V(Fminp, fminp) \
+ V(Fscale, fscale) \
+ V(Smaxp, smaxp) \
+ V(Sminp, sminp) \
+ V(Suqadd, suqadd) \
+ V(Umaxp, umaxp) \
+ V(Uminp, uminp) \
+ V(Usqadd, usqadd)
+
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const PRegisterM& pg, \
+ const ZRegister& zn, \
+ const ZRegister& zm) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ if (zd.Aliases(zm) && !zd.Aliases(zn)) { \
+ UseScratchRegisterScope temps(this); \
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
+ Mov(scratch, zm); \
+ MovprfxHelperScope guard(this, zd, pg, zn); \
+ ASMFN(zd, pg, zd, scratch); \
+ } else { \
+ MovprfxHelperScope guard(this, zd, pg, zn); \
+ ASMFN(zd, pg, zd, zm); \
+ } \
+ }
+VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
+// Instructions of the form "inst zda, pg, zda, zn", where they are
+// non-commutative and a reversed form is provided.
+#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
+ V(Asr, asr) \
+ V(Fdiv, fdiv) \
+ V(Fsub, fsub) \
+ V(Lsl, lsl) \
+ V(Lsr, lsr) \
+ V(Sdiv, sdiv) \
+ V(Shsub, shsub) \
+ V(Sqrshl, sqrshl) \
+ V(Sqshl, sqshl) \
+ V(Sqsub, sqsub) \
+ V(Srshl, srshl) \
+ V(Sub, sub) \
+ V(Udiv, udiv) \
+ V(Uhsub, uhsub) \
+ V(Uqrshl, uqrshl) \
+ V(Uqshl, uqshl) \
+ V(Uqsub, uqsub) \
+ V(Urshl, urshl)
+
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const PRegisterM& pg, \
+ const ZRegister& zn, \
+ const ZRegister& zm) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ NoncommutativeArithmeticHelper(zd, \
+ pg, \
+ zn, \
+ zm, \
+ static_cast<SVEArithPredicatedFn>( \
+ &Assembler::ASMFN), \
+ static_cast<SVEArithPredicatedFn>( \
+ &Assembler::ASMFN##r)); \
+ }
+VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
void MacroAssembler::Fadd(const ZRegister& zd,
const PRegisterM& pg,
@@ -828,14 +891,14 @@ void MacroAssembler::Index(const ZRegister& zd,
static IndexOperand Prepare(MacroAssembler* masm,
UseScratchRegisterScope* temps,
const Operand& op,
- const ZRegister& zd) {
+ const ZRegister& zd_inner) {
// Look for encodable immediates.
int imm;
if (op.IsImmediate()) {
- if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd, &imm)) {
+ if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
return IndexOperand(imm);
}
- Register scratch = temps->AcquireRegisterToHoldLane(zd);
+ Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
masm->Mov(scratch, op);
return IndexOperand(scratch);
} else {
@@ -1022,21 +1085,6 @@ void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
VIXL_UNREACHABLE();
}
-void MacroAssembler::Sdiv(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::sdiv),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::sdivr));
-}
-
void MacroAssembler::Sub(const ZRegister& zd,
IntegerOperand imm,
const ZRegister& zm) {
@@ -1058,36 +1106,6 @@ void MacroAssembler::Sub(const ZRegister& zd,
}
}
-void MacroAssembler::Sub(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::sub),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::subr));
-}
-
-void MacroAssembler::Udiv(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- NoncommutativeArithmeticHelper(zd,
- pg,
- zn,
- zm,
- static_cast<SVEArithPredicatedFn>(
- &Assembler::udiv),
- static_cast<SVEArithPredicatedFn>(
- &Assembler::udivr));
-}
-
void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr,
@@ -1135,7 +1153,7 @@ void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
}
template <typename Tg, typename Tf>
-void MacroAssembler::SVELoadStoreScalarImmHelper(
+void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
const ZRegister& zt,
const Tg& pg,
const SVEMemOperand& addr,
@@ -1157,6 +1175,13 @@ void MacroAssembler::SVELoadStoreScalarImmHelper(
return;
}
+ if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
+ addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zt, pg, addr);
+ return;
+ }
+
if (addr.IsEquivalentToScalar()) {
SingleEmissionCheckScope guard(this);
(this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
@@ -1473,169 +1498,176 @@ void MacroAssembler::Ldff1sw(const ZRegister& zt,
static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
}
-void MacroAssembler::Ld1rqb(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ld1rqb,
- 4,
- 4,
- NO_SVE_OFFSET_MODIFIER,
- -1);
-}
+#define VIXL_SVE_LD1R_LIST(V) \
+ V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
+
+#define VIXL_DEFINE_MASM_FUNC(SZ, SH) \
+ void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \
+ const PRegisterZ& pg, \
+ const SVEMemOperand& addr) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SVELoadStoreNTBroadcastQOHelper(zt, \
+ pg, \
+ addr, \
+ &MacroAssembler::ld1r##SZ, \
+ 4, \
+ SH, \
+ NO_SVE_OFFSET_MODIFIER, \
+ -1); \
+ }
-void MacroAssembler::Ld1rqd(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ld1rqd,
- 4,
- 4,
- NO_SVE_OFFSET_MODIFIER,
- -1);
-}
+VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
-void MacroAssembler::Ld1rqh(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ld1rqh,
- 4,
- 4,
- NO_SVE_OFFSET_MODIFIER,
- -1);
-}
-
-void MacroAssembler::Ld1rqw(const ZRegister& zt,
- const PRegisterZ& pg,
- const SVEMemOperand& addr) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ld1rqw,
- 4,
- 4,
- NO_SVE_OFFSET_MODIFIER,
- -1);
-}
+#undef VIXL_DEFINE_MASM_FUNC
+#undef VIXL_SVE_LD1R_LIST
void MacroAssembler::Ldnt1b(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ldnt1b,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ ldnt1b(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1b,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Ldnt1d(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ldnt1d,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ ldnt1d(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1d,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Ldnt1h(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ldnt1h,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ ldnt1h(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1h,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Ldnt1w(const ZRegister& zt,
const PRegisterZ& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::ldnt1w,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ ldnt1w(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::ldnt1w,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Stnt1b(const ZRegister& zt,
const PRegister& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::stnt1b,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ stnt1b(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1b,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Stnt1d(const ZRegister& zt,
const PRegister& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::stnt1d,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ stnt1d(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1d,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Stnt1h(const ZRegister& zt,
const PRegister& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::stnt1h,
- 4,
- 0,
- SVE_MUL_VL);
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ stnt1h(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1h,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
}
void MacroAssembler::Stnt1w(const ZRegister& zt,
const PRegister& pg,
const SVEMemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
- SVELoadStoreScalarImmHelper(zt,
- pg,
- addr,
- &MacroAssembler::stnt1w,
- 4,
- 0,
- SVE_MUL_VL);
-}
-
-void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn,
- const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index) {
+ if (addr.IsVectorPlusScalar()) {
+ SingleEmissionCheckScope guard(this);
+ stnt1w(zt, pg, addr);
+ } else {
+ SVELoadStoreNTBroadcastQOHelper(zt,
+ pg,
+ addr,
+ &MacroAssembler::stnt1w,
+ 4,
+ 0,
+ SVE_MUL_VL);
+ }
+}
+
+void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
if (zd.Aliases(za)) {
// zda = zda + (zn . zm)
SingleEmissionCheckScope guard(this);
@@ -1660,20 +1692,15 @@ void MacroAssembler::SVESdotUdotIndexHelper(IntArithIndexFn fn,
}
}
-void MacroAssembler::SVESdotUdotHelper(IntArithFn fn,
- const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm) {
- if (zd.Aliases(za)) {
- // zda = zda + (zn . zm)
- SingleEmissionCheckScope guard(this);
- (this->*fn)(zd, zn, zm);
-
- } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
- // zdn = za + (zdn . zm)
- // zdm = za + (zn . zdm)
- // zdnm = za + (zdnm . zdnm)
+void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
+ // zd = za . zd . zm
+ // zd = za . zn . zd
+ // zd = za . zd . zd
UseScratchRegisterScope temps(this);
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
{
@@ -1683,61 +1710,256 @@ void MacroAssembler::SVESdotUdotHelper(IntArithFn fn,
Mov(zd, scratch);
} else {
- // zd = za + (zn . zm)
MovprfxHelperScope guard(this, zd, za);
(this->*fn)(zd, zn, zm);
}
}
-void MacroAssembler::Fscale(const ZRegister& zd,
- const PRegisterM& pg,
- const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
+ // zd = za . zd . zm
+ // zd = za . zn . zd
+ // zd = za . zd . zd
UseScratchRegisterScope temps(this);
- ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
- Mov(scratch, zm);
- MovprfxHelperScope guard(this, zd, pg, zn);
- fscale(zd, pg, zd, scratch);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, za);
+ (this->*fn)(scratch, scratch, zn, zm);
+ }
+
+ Mov(zd, scratch);
} else {
- MovprfxHelperScope guard(this, zd, pg, zn);
- fscale(zd, pg, zd, zm);
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zd, zn, zm);
+ }
+}
+
+void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int imm) {
+ if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
+ // zd = za . zd . zm[i]
+ // zd = za . zn . zd[i]
+ // zd = za . zd . zd[i]
+ UseScratchRegisterScope temps(this);
+ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, scratch, za);
+ (this->*fn)(scratch, zn, zm, imm);
+ }
+
+ Mov(zd, scratch);
+ } else {
+ // zd = za . zn . zm[i]
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, zm, imm);
+ }
+}
+
+void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm) {
+ if (zn.Aliases(zm)) {
+ // If zn == zm, the difference is zero.
+ if (!zd.Aliases(za)) {
+ Mov(zd, za);
+ }
+ } else if (zd.Aliases(za)) {
+ SingleEmissionCheckScope guard(this);
+ (this->*fn)(zd, zn, zm);
+ } else if (zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+ Mov(ztmp, zn);
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, ztmp, zm);
+ } else if (zd.Aliases(zm)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
+ Mov(ztmp, zm);
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, ztmp);
+ } else {
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, zm);
}
}
+#define VIXL_SVE_4REG_LIST(V) \
+ V(Saba, saba, AbsoluteDifferenceAccumulate) \
+ V(Uaba, uaba, AbsoluteDifferenceAccumulate) \
+ V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \
+ V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \
+ V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \
+ V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \
+ V(Sdot, sdot, FourRegDestructiveHelper) \
+ V(Udot, udot, FourRegDestructiveHelper) \
+ V(Adclb, adclb, FourRegDestructiveHelper) \
+ V(Adclt, adclt, FourRegDestructiveHelper) \
+ V(Sbclb, sbclb, FourRegDestructiveHelper) \
+ V(Sbclt, sbclt, FourRegDestructiveHelper) \
+ V(Smlalb, smlalb, FourRegDestructiveHelper) \
+ V(Smlalt, smlalt, FourRegDestructiveHelper) \
+ V(Smlslb, smlslb, FourRegDestructiveHelper) \
+ V(Smlslt, smlslt, FourRegDestructiveHelper) \
+ V(Umlalb, umlalb, FourRegDestructiveHelper) \
+ V(Umlalt, umlalt, FourRegDestructiveHelper) \
+ V(Umlslb, umlslb, FourRegDestructiveHelper) \
+ V(Umlslt, umlslt, FourRegDestructiveHelper) \
+ V(Bcax, bcax, FourRegDestructiveHelper) \
+ V(Bsl, bsl, FourRegDestructiveHelper) \
+ V(Bsl1n, bsl1n, FourRegDestructiveHelper) \
+ V(Bsl2n, bsl2n, FourRegDestructiveHelper) \
+ V(Eor3, eor3, FourRegDestructiveHelper) \
+ V(Nbsl, nbsl, FourRegDestructiveHelper) \
+ V(Fmlalb, fmlalb, FourRegDestructiveHelper) \
+ V(Fmlalt, fmlalt, FourRegDestructiveHelper) \
+ V(Fmlslb, fmlslb, FourRegDestructiveHelper) \
+ V(Fmlslt, fmlslt, FourRegDestructiveHelper) \
+ V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \
+ V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
+ V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \
+ V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \
+ V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
+ V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \
+ V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \
+ V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \
+ V(Fmmla, fmmla, FourRegDestructiveHelper) \
+ V(Smmla, smmla, FourRegDestructiveHelper) \
+ V(Ummla, ummla, FourRegDestructiveHelper) \
+ V(Usmmla, usmmla, FourRegDestructiveHelper) \
+ V(Usdot, usdot, FourRegDestructiveHelper)
+
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const ZRegister& za, \
+ const ZRegister& zn, \
+ const ZRegister& zm) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ HELPER(&Assembler::ASMFN, zd, za, zn, zm); \
+ }
+VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
+#define VIXL_SVE_4REG_1IMM_LIST(V) \
+ V(Fmla, fmla, FourRegOneImmDestructiveHelper) \
+ V(Fmls, fmls, FourRegOneImmDestructiveHelper) \
+ V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \
+ V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \
+ V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \
+ V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \
+ V(Mla, mla, FourRegOneImmDestructiveHelper) \
+ V(Mls, mls, FourRegOneImmDestructiveHelper) \
+ V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \
+ V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \
+ V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \
+ V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \
+ V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
+ V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
+ V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
+ V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
+ V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
+ V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
+ V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \
+ V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \
+ V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \
+ V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
+
+#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
+ void MacroAssembler::MASMFN(const ZRegister& zd, \
+ const ZRegister& za, \
+ const ZRegister& zn, \
+ const ZRegister& zm, \
+ int imm) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \
+ }
+VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
void MacroAssembler::Sdot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
- const ZRegister& zm) {
+ const ZRegister& zm,
+ int index) {
VIXL_ASSERT(allow_macro_instructions_);
- SVESdotUdotHelper(&Assembler::sdot, zd, za, zn, zm);
+ SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
}
-void MacroAssembler::Sdot(const ZRegister& zd,
+void MacroAssembler::Udot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm,
int index) {
VIXL_ASSERT(allow_macro_instructions_);
- SVESdotUdotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
+ SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
}
-void MacroAssembler::Udot(const ZRegister& zd,
+void MacroAssembler::Sudot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Usdot(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int index) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
+}
+
+void MacroAssembler::Cdot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
- const ZRegister& zm) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVESdotUdotHelper(&Assembler::udot, zd, za, zn, zm);
+ const ZRegister& zm,
+ int index,
+ int rot) {
+ // This doesn't handle zm when it's out of the range that can be encoded in
+ // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ {
+ MovprfxHelperScope guard(this, ztmp, za);
+ cdot(ztmp, zn, zm, index, rot);
+ }
+ Mov(zd, ztmp);
+ } else {
+ MovprfxHelperScope guard(this, zd, za);
+ cdot(zd, zn, zm, index, rot);
+ }
}
-void MacroAssembler::Udot(const ZRegister& zd,
+void MacroAssembler::Cdot(const ZRegister& zd,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm,
- int index) {
- VIXL_ASSERT(allow_macro_instructions_);
- SVESdotUdotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
+ int rot) {
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
+ UseScratchRegisterScope temps(this);
+ VIXL_ASSERT(AreSameLaneSize(zn, zm));
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
+ Mov(ztmp, zd.Aliases(zn) ? zn : zm);
+ MovprfxHelperScope guard(this, zd, za);
+ cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
+ } else {
+ MovprfxHelperScope guard(this, zd, za);
+ cdot(zd, zn, zm, rot);
+ }
}
void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
@@ -1792,35 +2014,6 @@ void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
}
}
-void MacroAssembler::FPMulAddIndexHelper(SVEMulAddIndexFn fn,
- const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index) {
- if (zd.Aliases(za)) {
- // zda = zda + (zn * zm[i])
- SingleEmissionCheckScope guard(this);
- (this->*fn)(zd, zn, zm, index);
-
- } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
- // zdn = za + (zdn * zm[i])
- // zdm = za + (zn * zdm[i])
- // zdnm = za + (zdnm * zdnm[i])
- UseScratchRegisterScope temps(this);
- ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
- {
- MovprfxHelperScope guard(this, scratch, za);
- (this->*fn)(scratch, zn, zm, index);
- }
- Mov(zd, scratch);
- } else {
- // zd = za + (zn * zm[i])
- MovprfxHelperScope guard(this, zd, za);
- (this->*fn)(zd, zn, zm, index);
- }
-}
-
void MacroAssembler::Fmla(const ZRegister& zd,
const PRegisterM& pg,
const ZRegister& za,
@@ -1838,15 +2031,6 @@ void MacroAssembler::Fmla(const ZRegister& zd,
nan_option);
}
-void MacroAssembler::Fmla(const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index) {
- VIXL_ASSERT(allow_macro_instructions_);
- FPMulAddIndexHelper(&Assembler::fmla, zd, za, zn, zm, index);
-}
-
void MacroAssembler::Fmls(const ZRegister& zd,
const PRegisterM& pg,
const ZRegister& za,
@@ -1864,15 +2048,6 @@ void MacroAssembler::Fmls(const ZRegister& zd,
nan_option);
}
-void MacroAssembler::Fmls(const ZRegister& zd,
- const ZRegister& za,
- const ZRegister& zn,
- const ZRegister& zm,
- int index) {
- VIXL_ASSERT(allow_macro_instructions_);
- FPMulAddIndexHelper(&Assembler::fmls, zd, za, zn, zm, index);
-}
-
void MacroAssembler::Fnmla(const ZRegister& zd,
const PRegisterM& pg,
const ZRegister& za,
@@ -1944,25 +2119,24 @@ void MacroAssembler::Fcadd(const ZRegister& zd,
}
}
-void MacroAssembler::Ext(const ZRegister& zd,
- const ZRegister& zn,
- const ZRegister& zm,
- unsigned offset) {
+void MacroAssembler::Fcmla(const ZRegister& zd,
+ const PRegisterM& pg,
+ const ZRegister& za,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
VIXL_ASSERT(allow_macro_instructions_);
- if (zd.Aliases(zm) && !zd.Aliases(zn)) {
- // zd = ext(zn, zd, offset)
+ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
UseScratchRegisterScope temps(this);
- ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
{
- MovprfxHelperScope guard(this, scratch, zn);
- ext(scratch, scratch, zm, offset);
+ MovprfxHelperScope guard(this, ztmp, za);
+ fcmla(ztmp, pg, zn, zm, rot);
}
- Mov(zd, scratch);
+ Mov(zd, pg, ztmp);
} else {
- // zd = ext(zn, zm, offset)
- // zd = ext(zd, zd, offset)
- MovprfxHelperScope guard(this, zd, zn);
- ext(zd, zd, zm, offset);
+ MovprfxHelperScope guard(this, zd, pg, za);
+ fcmla(zd, pg, zn, zm, rot);
}
}
@@ -1971,7 +2145,10 @@ void MacroAssembler::Splice(const ZRegister& zd,
const ZRegister& zn,
const ZRegister& zm) {
VIXL_ASSERT(allow_macro_instructions_);
- if (zd.Aliases(zm) && !zd.Aliases(zn)) {
+ if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
+ SingleEmissionCheckScope guard(this);
+ splice(zd, pg, zn, zm);
+ } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
UseScratchRegisterScope temps(this);
ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
{
@@ -2023,5 +2200,87 @@ void MacroAssembler::Clastb(const ZRegister& zd,
}
}
+void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (!zd.Aliases(za) && zd.Aliases(zn)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
+ Mov(ztmp, zn);
+ {
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, ztmp, shift);
+ }
+ } else {
+ MovprfxHelperScope guard(this, zd, za);
+ (this->*fn)(zd, zn, shift);
+ }
+}
+
+void MacroAssembler::Srsra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift) {
+ ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
+}
+
+void MacroAssembler::Ssra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift) {
+ ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
+}
+
+void MacroAssembler::Ursra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift) {
+ ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
+}
+
+void MacroAssembler::Usra(const ZRegister& zd,
+ const ZRegister& za,
+ const ZRegister& zn,
+ int shift) {
+ ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
+}
+
+void MacroAssembler::ComplexAddition(ZZZImmFn fn,
+ const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ if (!zd.Aliases(zn) && zd.Aliases(zm)) {
+ UseScratchRegisterScope temps(this);
+ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
+ Mov(ztmp, zm);
+ {
+ MovprfxHelperScope guard(this, zd, zn);
+ (this->*fn)(zd, zd, ztmp, rot);
+ }
+ } else {
+ MovprfxHelperScope guard(this, zd, zn);
+ (this->*fn)(zd, zd, zm, rot);
+ }
+}
+
+void MacroAssembler::Cadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
+}
+
+void MacroAssembler::Sqcadd(const ZRegister& zd,
+ const ZRegister& zn,
+ const ZRegister& zm,
+ int rot) {
+ ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/operands-aarch64.cc b/src/aarch64/operands-aarch64.cc
index 008179e4..8db129c9 100644
--- a/src/aarch64/operands-aarch64.cc
+++ b/src/aarch64/operands-aarch64.cc
@@ -360,12 +360,16 @@ bool MemOperand::IsRegisterOffset() const {
return (addrmode_ == Offset) && !regoffset_.Is(NoReg);
}
-
bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; }
-
-
bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; }
+bool MemOperand::IsImmediatePreIndex() const {
+ return IsPreIndex() && regoffset_.Is(NoReg);
+}
+
+bool MemOperand::IsImmediatePostIndex() const {
+ return IsPostIndex() && regoffset_.Is(NoReg);
+}
void MemOperand::AddOffset(int64_t offset) {
VIXL_ASSERT(IsImmediateOffset());
@@ -382,6 +386,7 @@ bool SVEMemOperand::IsValid() const {
if (IsScalarPlusScalar()) count++;
if (IsScalarPlusVector()) count++;
if (IsVectorPlusImmediate()) count++;
+ if (IsVectorPlusScalar()) count++;
if (IsVectorPlusVector()) count++;
VIXL_ASSERT(count <= 1);
}
@@ -406,7 +411,7 @@ bool SVEMemOperand::IsValid() const {
return IsScalarPlusImmediate() || IsScalarPlusScalar() ||
IsScalarPlusVector() || IsVectorPlusImmediate() ||
- IsVectorPlusVector();
+ IsVectorPlusScalar() || IsVectorPlusVector();
}
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index ad03a9ee..08ee4a61 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -434,9 +434,14 @@ class MemOperand {
bool IsImmediateOffset() const;
// True for register-offset (but not indexed) MemOperands.
bool IsRegisterOffset() const;
-
+ // True for immediate or register pre-indexed MemOperands.
bool IsPreIndex() const;
+ // True for immediate or register post-indexed MemOperands.
bool IsPostIndex() const;
+ // True for immediate pre-indexed MemOperands, [reg, #imm]!
+ bool IsImmediatePreIndex() const;
+ // True for immediate post-indexed MemOperands, [reg], #imm
+ bool IsImmediatePostIndex() const;
void AddOffset(int64_t offset);
@@ -545,6 +550,17 @@ class SVEMemOperand {
VIXL_ASSERT(IsValid());
}
+ // "vector-plus-scalar", like [z0.d, x0]
+ SVEMemOperand(ZRegister base, Register offset)
+ : base_(base),
+ regoffset_(offset),
+ offset_(0),
+ mod_(NO_SVE_OFFSET_MODIFIER),
+ shift_amount_(0) {
+ VIXL_ASSERT(IsValid());
+ VIXL_ASSERT(IsVectorPlusScalar());
+ }
+
// "vector-plus-vector", like [z0.d, z1.d, UXTW]
template <typename M = SVEOffsetModifier>
SVEMemOperand(ZRegister base,
@@ -603,6 +619,11 @@ class SVEMemOperand {
regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER);
}
+ bool IsVectorPlusScalar() const {
+ return base_.IsZRegister() && regoffset_.IsX() &&
+ (base_.IsLaneSizeS() || base_.IsLaneSizeD());
+ }
+
bool IsVectorPlusVector() const {
return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) &&
AreSameFormat(base_, regoffset_) &&
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 04f1165d..d183dc35 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -67,9 +67,352 @@ SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
}
+const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
+ static const FormToVisitorFnMap form_to_visitor = {
+ DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
+ SIM_AUD_VISITOR_MAP(Simulator),
+ {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
+ {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement},
+ {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement},
+ {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
+ {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
+ {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
+ {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
+ {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
+ {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
+ {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
+ {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
+ {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
+ {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
+ {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
+ {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct},
+ {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct},
+ {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct},
+ {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
+ {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
+ {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong},
+ {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong},
+ {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
+ {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS},
+ {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
+ {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
+ {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
+ {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
+ {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
+ {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
+ {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
+ {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
+ {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
+ {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
+ {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
+ {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
+ {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
+ {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
+ {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
+ {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
+ {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
+ {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
+ {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex},
+ {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex},
+ {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex},
+ {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
+ {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
+ {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
+ {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
+ {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
+ {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
+ {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
+ {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
+ {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
+ {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
+ {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice},
+ {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
+ {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
+ {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
+ {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
+ {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
+ {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
+ {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
+ {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
+ {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
+ {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
+ {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
+ {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
+ {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
+ {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
+ {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
+ {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
+ {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
+ {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
+ {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
+ {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
+ {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
+ {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
+ {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
+ {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
+ {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup},
+ {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup},
+ {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
+ {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
+ {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
+ {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
+ {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
+ {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
+ {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
+ {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
+ {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
+ {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
+ {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
+ {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
+ {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
+ {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
+ {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
+ {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
+ {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
+ {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
+ {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
+ {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
+ {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
+ {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
+ {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
+ {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
+ {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
+ {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
+ {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate},
+ {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
+ {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul},
+ {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
+ {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
+ {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
+ {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
+ {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul},
+ {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul},
+ {"ld1row_z_p_bi_u32"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1row_z_p_br_contiguous"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1rod_z_p_bi_u64"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1rod_z_p_br_contiguous"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1rob_z_p_bi_u8"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1rob_z_p_br_contiguous"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"ld1roh_z_p_bi_u16"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
+ {"ld1roh_z_p_br_contiguous"_h,
+ &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
+ {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated},
+ {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
+ {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
+ {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
+ {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
+ {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
+ };
+ return &form_to_visitor;
+}
+
Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
: memory_(std::move(stack)),
- movprfx_(NULL),
+ last_instr_(NULL),
cpu_features_auditor_(decoder, CPUFeatures::All()) {
// Ensure that shift operations act as the simulator expects.
VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
@@ -440,13 +783,29 @@ void Simulator::SetTraceParameters(int parameters) {
}
}
-
// Helpers ---------------------------------------------------------------------
uint64_t Simulator::AddWithCarry(unsigned reg_size,
bool set_flags,
uint64_t left,
uint64_t right,
int carry_in) {
+ std::pair<uint64_t, uint8_t> result_and_flags =
+ AddWithCarry(reg_size, left, right, carry_in);
+ if (set_flags) {
+ uint8_t flags = result_and_flags.second;
+ ReadNzcv().SetN((flags >> 3) & 1);
+ ReadNzcv().SetZ((flags >> 2) & 1);
+ ReadNzcv().SetC((flags >> 1) & 1);
+ ReadNzcv().SetV((flags >> 0) & 1);
+ LogSystemRegister(NZCV);
+ }
+ return result_and_flags.first;
+}
+
+std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
+ uint64_t left,
+ uint64_t right,
+ int carry_in) {
VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
@@ -458,28 +817,74 @@ uint64_t Simulator::AddWithCarry(unsigned reg_size,
right &= reg_mask;
uint64_t result = (left + right + carry_in) & reg_mask;
- if (set_flags) {
- ReadNzcv().SetN(CalcNFlag(result, reg_size));
- ReadNzcv().SetZ(CalcZFlag(result));
+ // NZCV bits, ordered N in bit 3 to V in bit 0.
+ uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
+ nzcv |= CalcZFlag(result) ? 4 : 0;
- // Compute the C flag by comparing the result to the max unsigned integer.
- uint64_t max_uint_2op = max_uint - carry_in;
- bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
- ReadNzcv().SetC(C ? 1 : 0);
+ // Compute the C flag by comparing the result to the max unsigned integer.
+ uint64_t max_uint_2op = max_uint - carry_in;
+ bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
+ nzcv |= C ? 2 : 0;
- // Overflow iff the sign bit is the same for the two inputs and different
- // for the result.
- uint64_t left_sign = left & sign_mask;
- uint64_t right_sign = right & sign_mask;
- uint64_t result_sign = result & sign_mask;
- bool V = (left_sign == right_sign) && (left_sign != result_sign);
- ReadNzcv().SetV(V ? 1 : 0);
+ // Overflow iff the sign bit is the same for the two inputs and different
+ // for the result.
+ uint64_t left_sign = left & sign_mask;
+ uint64_t right_sign = right & sign_mask;
+ uint64_t result_sign = result & sign_mask;
+ bool V = (left_sign == right_sign) && (left_sign != result_sign);
+ nzcv |= V ? 1 : 0;
- LogSystemRegister(NZCV);
- }
- return result;
+ return std::make_pair(result, nzcv);
+}
+
+using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
+
+vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
+ std::pair<uint64_t, uint8_t> sum_lo =
+ AddWithCarry(kXRegSize, x.second, y.second, 0);
+ int carry_in = (sum_lo.second & 0x2) >> 1; // C flag in NZCV result.
+ std::pair<uint64_t, uint8_t> sum_hi =
+ AddWithCarry(kXRegSize, x.first, y.first, carry_in);
+ return std::make_pair(sum_hi.first, sum_lo.first);
+}
+
+vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
+ // Negate the integer value. Throw an assertion when the input is INT128_MIN.
+ VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
+ x.first = ~x.first;
+ x.second = ~x.second;
+ return Add128(x, {0, 1});
}
+vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
+ bool neg_result = false;
+ if ((x >> 63) == 1) {
+ x = -x;
+ neg_result = !neg_result;
+ }
+ if ((y >> 63) == 1) {
+ y = -y;
+ neg_result = !neg_result;
+ }
+
+ uint64_t x_lo = x & 0xffffffff;
+ uint64_t x_hi = x >> 32;
+ uint64_t y_lo = y & 0xffffffff;
+ uint64_t y_hi = y >> 32;
+
+ uint64_t t1 = x_lo * y_hi;
+ uint64_t t2 = x_hi * y_lo;
+ vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
+ vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
+ vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
+ vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
+
+ vixl_uint128_t result = Add128(a, b);
+ result = Add128(result, c);
+ result = Add128(result, d);
+ return neg_result ? std::make_pair(-result.first - 1, -result.second)
+ : result;
+}
int64_t Simulator::ShiftOperand(unsigned reg_size,
uint64_t uvalue,
@@ -1569,6 +1974,1564 @@ void Simulator::PrintTakenBranch(const Instruction* target) {
// Visitors---------------------------------------------------------------------
+void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
+ VIXL_ASSERT(metadata->count("form") > 0);
+ std::string form = (*metadata)["form"];
+ form_hash_ = Hash(form.c_str());
+ const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
+ FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
+ if (it == fv->end()) {
+ VisitUnimplemented(instr);
+ } else {
+ (it->second)(this, instr);
+ }
+}
+
+void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (form_hash_) {
+ case "match_p_p_zz"_h:
+ match(vform, pd, zn, zm, /* negate_match = */ false);
+ break;
+ case "nmatch_p_p_zz"_h:
+ match(vform, pd, zn, zm, /* negate_match = */ true);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_zeroing(pd, pg, pd);
+ PredTest(vform, pg, pd);
+}
+
+void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pd = ReadPRegister(instr->GetPd());
+ uint64_t src1 = ReadXRegister(instr->GetRn());
+ uint64_t src2 = ReadXRegister(instr->GetRm());
+
+ uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
+ absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
+
+ bool no_conflict = false;
+ switch (form_hash_) {
+ case "whilerw_p_rr"_h:
+ no_conflict = (absdiff == 0);
+ break;
+ case "whilewr_p_rr"_h:
+ no_conflict = (absdiff == 0) || (src2 <= src1);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ LogicPRegister dst(pd);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst.SetActive(vform,
+ i,
+ no_conflict || (static_cast<uint64_t>(i) < absdiff));
+ }
+
+ PredTest(vform, GetPTrue(), pd);
+}
+
+void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
+ VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h);
+
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
+
+ int index = instr->GetSVEExtractImmediate();
+ int vl = GetVectorLengthInBytes();
+ index = (index >= vl) ? 0 : index;
+
+ ext(kFormatVnB, zd, zn, zn2, index);
+}
+
+void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (form_hash_) {
+ case "histseg_z_zz"_h:
+ if (instr->GetSVEVectorFormat() == kFormatVnB) {
+ histogram(kFormatVnB,
+ zd,
+ GetPTrue(),
+ zn,
+ zm,
+ /* do_segmented = */ true);
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+ break;
+ case "pmul_z_zz"_h:
+ pmul(kFormatVnB, zd, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ // The encoding for B and H-sized lanes are redefined to encode the most
+ // significant bit of index for H-sized lanes. B-sized lanes are not
+ // supported.
+ if (vform == kFormatVnB) vform = kFormatVnH;
+
+ VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) ||
+ (form_hash_ == "mul_z_zzi_h"_h) ||
+ (form_hash_ == "mul_z_zzi_s"_h));
+
+ SimVRegister temp;
+ dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
+ mul(vform, zd, zn, temp);
+}
+
+void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ // The encoding for B and H-sized lanes are redefined to encode the most
+ // significant bit of index for H-sized lanes. B-sized lanes are not
+ // supported.
+ if (vform == kFormatVnB) vform = kFormatVnH;
+
+ VIXL_ASSERT(
+ (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) ||
+ (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) ||
+ (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h));
+
+ SimVRegister temp;
+ dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
+ if (instr->ExtractBit(10) == 0) {
+ mla(vform, zda, zda, zn, temp);
+ } else {
+ mls(vform, zda, zda, zn, temp);
+ }
+}
+
+void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ // The encoding for B and H-sized lanes are redefined to encode the most
+ // significant bit of index for H-sized lanes. B-sized lanes are not
+ // supported.
+ if (vform == kFormatVnB) {
+ vform = kFormatVnH;
+ }
+
+ SimVRegister temp;
+ dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
+ switch (form_hash_) {
+ case "sqdmulh_z_zzi_h"_h:
+ case "sqdmulh_z_zzi_s"_h:
+ case "sqdmulh_z_zzi_d"_h:
+ sqdmulh(vform, zd, zn, temp);
+ break;
+ case "sqrdmulh_z_zzi_h"_h:
+ case "sqrdmulh_z_zzi_s"_h:
+ case "sqrdmulh_z_zzi_d"_h:
+ sqrdmulh(vform, zd, zn, temp);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister temp, zm_idx, zn_b, zn_t;
+ // Instead of calling the indexed form of the instruction logic, we call the
+ // vector form, which can reuse existing function logic without modification.
+ // Select the specified elements based on the index input and than pack them
+ // to the corresponding position.
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
+ pack_even_elements(vform_half, zm_idx, temp);
+
+ pack_even_elements(vform_half, zn_b, zn);
+ pack_odd_elements(vform_half, zn_t, zn);
+
+ switch (form_hash_) {
+ case "smullb_z_zzi_s"_h:
+ case "smullb_z_zzi_d"_h:
+ smull(vform, zd, zn_b, zm_idx);
+ break;
+ case "smullt_z_zzi_s"_h:
+ case "smullt_z_zzi_d"_h:
+ smull(vform, zd, zn_t, zm_idx);
+ break;
+ case "sqdmullb_z_zzi_d"_h:
+ sqdmull(vform, zd, zn_b, zm_idx);
+ break;
+ case "sqdmullt_z_zzi_d"_h:
+ sqdmull(vform, zd, zn_t, zm_idx);
+ break;
+ case "umullb_z_zzi_s"_h:
+ case "umullb_z_zzi_d"_h:
+ umull(vform, zd, zn_b, zm_idx);
+ break;
+ case "umullt_z_zzi_s"_h:
+ case "umullt_z_zzi_d"_h:
+ umull(vform, zd, zn_t, zm_idx);
+ break;
+ case "sqdmullb_z_zzi_s"_h:
+ sqdmull(vform, zd, zn_b, zm_idx);
+ break;
+ case "sqdmullt_z_zzi_s"_h:
+ sqdmull(vform, zd, zn_t, zm_idx);
+ break;
+ case "smlalb_z_zzzi_s"_h:
+ case "smlalb_z_zzzi_d"_h:
+ smlal(vform, zd, zn_b, zm_idx);
+ break;
+ case "smlalt_z_zzzi_s"_h:
+ case "smlalt_z_zzzi_d"_h:
+ smlal(vform, zd, zn_t, zm_idx);
+ break;
+ case "smlslb_z_zzzi_s"_h:
+ case "smlslb_z_zzzi_d"_h:
+ smlsl(vform, zd, zn_b, zm_idx);
+ break;
+ case "smlslt_z_zzzi_s"_h:
+ case "smlslt_z_zzzi_d"_h:
+ smlsl(vform, zd, zn_t, zm_idx);
+ break;
+ case "umlalb_z_zzzi_s"_h:
+ case "umlalb_z_zzzi_d"_h:
+ umlal(vform, zd, zn_b, zm_idx);
+ break;
+ case "umlalt_z_zzzi_s"_h:
+ case "umlalt_z_zzzi_d"_h:
+ umlal(vform, zd, zn_t, zm_idx);
+ break;
+ case "umlslb_z_zzzi_s"_h:
+ case "umlslb_z_zzzi_d"_h:
+ umlsl(vform, zd, zn_b, zm_idx);
+ break;
+ case "umlslt_z_zzzi_s"_h:
+ case "umlslt_z_zzzi_d"_h:
+ umlsl(vform, zd, zn_t, zm_idx);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result, zd_b;
+
+ pack_even_elements(kFormatVnH, zd_b, zd);
+
+ switch (form_hash_) {
+ case "fcvtnt_z_p_z_s2h"_h:
+ fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
+ pack_even_elements(kFormatVnH, result, result);
+ zip1(kFormatVnH, result, zd_b, result);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(kFormatVnS, zd, pg, result);
+}
+
+void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result, zero, zd_b;
+
+ zero.Clear();
+ pack_even_elements(kFormatVnS, zd_b, zd);
+
+ switch (form_hash_) {
+ case "fcvtnt_z_p_z_d2s"_h:
+ fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
+ pack_even_elements(kFormatVnS, result, result);
+ zip1(kFormatVnS, result, zd_b, result);
+ break;
+ case "fcvtx_z_p_z_d2s"_h:
+ fcvtxn(kFormatVnS, result, zn);
+ zip1(kFormatVnS, result, result, zero);
+ break;
+ case "fcvtxnt_z_p_z_d2s"_h:
+ fcvtxn(kFormatVnS, result, zn);
+ zip1(kFormatVnS, result, zd_b, result);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(kFormatVnD, zd, pg, result);
+}
+
+void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "fcvtlt_z_p_z_h2s"_h:
+ ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
+ fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
+ break;
+ case "fcvtlt_z_p_z_s2d"_h:
+ ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
+ fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ if (vform != kFormatVnS) {
+ VIXL_UNIMPLEMENTED();
+ }
+
+ switch (form_hash_) {
+ case "urecpe_z_p_z"_h:
+ urecpe(vform, result, zn);
+ break;
+ case "ursqrte_z_p_z"_h:
+ ursqrte(vform, result, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "flogb_z_p_z"_h:
+ vform = instr->GetSVEVectorFormat(17);
+ flogb(vform, result, zn);
+ break;
+ case "sqabs_z_p_z"_h:
+ abs(vform, result, zn).SignedSaturate(vform);
+ break;
+ case "sqneg_z_p_z"_h:
+ neg(vform, result, zn).SignedSaturate(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(vform, zd, pg, result);
+}
+
+void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h);
+ if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
+ histogram(vform, result, pg, zn, zm);
+ mov_zeroing(vform, zd, pg, result);
+ } else {
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+ bool do_bext = false;
+
+ switch (form_hash_) {
+ case "bdep_z_zz"_h:
+ bdep(vform, zd, zn, zm);
+ break;
+ case "bext_z_zz"_h:
+ do_bext = true;
+ VIXL_FALLTHROUGH();
+ case "bgrp_z_zz"_h:
+ bgrp(vform, zd, zn, zm, do_bext);
+ break;
+ case "eorbt_z_zz"_h:
+ rotate_elements_right(vform, result, zm, 1);
+ SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
+ mov_alternating(vform, zd, result, 0);
+ break;
+ case "eortb_z_zz"_h:
+ rotate_elements_right(vform, result, zm, -1);
+ SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
+ mov_alternating(vform, zd, result, 1);
+ break;
+ case "mul_z_zz"_h:
+ mul(vform, zd, zn, zm);
+ break;
+ case "smulh_z_zz"_h:
+ smulh(vform, zd, zn, zm);
+ break;
+ case "sqdmulh_z_zz"_h:
+ sqdmulh(vform, zd, zn, zm);
+ break;
+ case "sqrdmulh_z_zz"_h:
+ sqrdmulh(vform, zd, zn, zm);
+ break;
+ case "umulh_z_zz"_h:
+ umulh(vform, zd, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister zm_b, zm_t;
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ pack_even_elements(vform_half, zm_b, zm);
+ pack_odd_elements(vform_half, zm_t, zm);
+
+ switch (form_hash_) {
+ case "saddwb_z_zz"_h:
+ saddw(vform, zd, zn, zm_b);
+ break;
+ case "saddwt_z_zz"_h:
+ saddw(vform, zd, zn, zm_t);
+ break;
+ case "ssubwb_z_zz"_h:
+ ssubw(vform, zd, zn, zm_b);
+ break;
+ case "ssubwt_z_zz"_h:
+ ssubw(vform, zd, zn, zm_t);
+ break;
+ case "uaddwb_z_zz"_h:
+ uaddw(vform, zd, zn, zm_b);
+ break;
+ case "uaddwt_z_zz"_h:
+ uaddw(vform, zd, zn, zm_t);
+ break;
+ case "usubwb_z_zz"_h:
+ usubw(vform, zd, zn, zm_b);
+ break;
+ case "usubwt_z_zz"_h:
+ usubw(vform, zd, zn, zm_t);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT((lane_size >= 0) &&
+ (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int shift_dist = shift_and_lane_size.first;
+
+ switch (form_hash_) {
+ case "sli_z_zzi"_h:
+ // Shift distance is computed differently for left shifts. Convert the
+ // result.
+ shift_dist = (8 << lane_size) - shift_dist;
+ sli(vform, zd, zn, shift_dist);
+ break;
+ case "sri_z_zzi"_h:
+ sri(vform, zd, zn, shift_dist);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVENarrow(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
+ (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int right_shift_dist = shift_and_lane_size.first;
+ bool top = false;
+
+ switch (form_hash_) {
+ case "sqxtnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqxtnb_z_zz"_h:
+ sqxtn(vform, result, zn);
+ break;
+ case "sqxtunt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqxtunb_z_zz"_h:
+ sqxtun(vform, result, zn);
+ break;
+ case "uqxtnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "uqxtnb_z_zz"_h:
+ uqxtn(vform, result, zn);
+ break;
+ case "rshrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "rshrnb_z_zi"_h:
+ rshrn(vform, result, zn, right_shift_dist);
+ break;
+ case "shrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "shrnb_z_zi"_h:
+ shrn(vform, result, zn, right_shift_dist);
+ break;
+ case "sqrshrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqrshrnb_z_zi"_h:
+ sqrshrn(vform, result, zn, right_shift_dist);
+ break;
+ case "sqrshrunt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqrshrunb_z_zi"_h:
+ sqrshrun(vform, result, zn, right_shift_dist);
+ break;
+ case "sqshrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqshrnb_z_zi"_h:
+ sqshrn(vform, result, zn, right_shift_dist);
+ break;
+ case "sqshrunt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "sqshrunb_z_zi"_h:
+ sqshrun(vform, result, zn, right_shift_dist);
+ break;
+ case "uqrshrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "uqrshrnb_z_zi"_h:
+ uqrshrn(vform, result, zn, right_shift_dist);
+ break;
+ case "uqshrnt_z_zi"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "uqshrnb_z_zi"_h:
+ uqshrn(vform, result, zn, right_shift_dist);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ if (top) {
+ // Keep even elements, replace odd elements with the results.
+ xtn(vform, zd, zd);
+ zip1(vform, zd, zd, result);
+ } else {
+ // Zero odd elements, replace even elements with the results.
+ SimVRegister zero;
+ zero.Clear();
+ zip1(vform, zd, result, zero);
+ }
+}
+
+void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
+
+ // Construct temporary registers containing the even (bottom) and odd (top)
+ // elements.
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ pack_even_elements(vform_half, zn_b, zn);
+ pack_even_elements(vform_half, zm_b, zm);
+ pack_odd_elements(vform_half, zn_t, zn);
+ pack_odd_elements(vform_half, zm_t, zm);
+
+ switch (form_hash_) {
+ case "sabdlb_z_zz"_h:
+ sabdl(vform, zd, zn_b, zm_b);
+ break;
+ case "sabdlt_z_zz"_h:
+ sabdl(vform, zd, zn_t, zm_t);
+ break;
+ case "saddlb_z_zz"_h:
+ saddl(vform, zd, zn_b, zm_b);
+ break;
+ case "saddlbt_z_zz"_h:
+ saddl(vform, zd, zn_b, zm_t);
+ break;
+ case "saddlt_z_zz"_h:
+ saddl(vform, zd, zn_t, zm_t);
+ break;
+ case "ssublb_z_zz"_h:
+ ssubl(vform, zd, zn_b, zm_b);
+ break;
+ case "ssublbt_z_zz"_h:
+ ssubl(vform, zd, zn_b, zm_t);
+ break;
+ case "ssublt_z_zz"_h:
+ ssubl(vform, zd, zn_t, zm_t);
+ break;
+ case "ssubltb_z_zz"_h:
+ ssubl(vform, zd, zn_t, zm_b);
+ break;
+ case "uabdlb_z_zz"_h:
+ uabdl(vform, zd, zn_b, zm_b);
+ break;
+ case "uabdlt_z_zz"_h:
+ uabdl(vform, zd, zn_t, zm_t);
+ break;
+ case "uaddlb_z_zz"_h:
+ uaddl(vform, zd, zn_b, zm_b);
+ break;
+ case "uaddlt_z_zz"_h:
+ uaddl(vform, zd, zn_t, zm_t);
+ break;
+ case "usublb_z_zz"_h:
+ usubl(vform, zd, zn_b, zm_b);
+ break;
+ case "usublt_z_zz"_h:
+ usubl(vform, zd, zn_t, zm_t);
+ break;
+ case "sabalb_z_zzz"_h:
+ sabal(vform, zd, zn_b, zm_b);
+ break;
+ case "sabalt_z_zzz"_h:
+ sabal(vform, zd, zn_t, zm_t);
+ break;
+ case "uabalb_z_zzz"_h:
+ uabal(vform, zd, zn_b, zm_b);
+ break;
+ case "uabalt_z_zzz"_h:
+ uabal(vform, zd, zn_t, zm_t);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ pack_even_elements(vform_half, zn_b, zn);
+ pack_even_elements(vform_half, zm_b, zm);
+ pack_odd_elements(vform_half, zn_t, zn);
+ pack_odd_elements(vform_half, zm_t, zm);
+
+ switch (form_hash_) {
+ case "pmullb_z_zz"_h:
+ // '00' is reserved for Q-sized lane.
+ if (vform == kFormatVnB) {
+ VIXL_UNIMPLEMENTED();
+ }
+ pmull(vform, zd, zn_b, zm_b);
+ break;
+ case "pmullt_z_zz"_h:
+ // '00' is reserved for Q-sized lane.
+ if (vform == kFormatVnB) {
+ VIXL_UNIMPLEMENTED();
+ }
+ pmull(vform, zd, zn_t, zm_t);
+ break;
+ case "smullb_z_zz"_h:
+ smull(vform, zd, zn_b, zm_b);
+ break;
+ case "smullt_z_zz"_h:
+ smull(vform, zd, zn_t, zm_t);
+ break;
+ case "sqdmullb_z_zz"_h:
+ sqdmull(vform, zd, zn_b, zm_b);
+ break;
+ case "sqdmullt_z_zz"_h:
+ sqdmull(vform, zd, zn_t, zm_t);
+ break;
+ case "umullb_z_zz"_h:
+ umull(vform, zd, zn_b, zm_b);
+ break;
+ case "umullt_z_zz"_h:
+ umull(vform, zd, zn_t, zm_t);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+ bool top = false;
+
+ VectorFormat vform_src = instr->GetSVEVectorFormat();
+ if (vform_src == kFormatVnB) {
+ VIXL_UNIMPLEMENTED();
+ }
+ VectorFormat vform = VectorFormatHalfWidth(vform_src);
+
+ switch (form_hash_) {
+ case "addhnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "addhnb_z_zz"_h:
+ addhn(vform, result, zn, zm);
+ break;
+ case "raddhnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "raddhnb_z_zz"_h:
+ raddhn(vform, result, zn, zm);
+ break;
+ case "rsubhnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "rsubhnb_z_zz"_h:
+ rsubhn(vform, result, zn, zm);
+ break;
+ case "subhnt_z_zz"_h:
+ top = true;
+ VIXL_FALLTHROUGH();
+ case "subhnb_z_zz"_h:
+ subhn(vform, result, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ if (top) {
+ // Keep even elements, replace odd elements with the results.
+ xtn(vform, zd, zd);
+ zip1(vform, zd, zd, result);
+ } else {
+ // Zero odd elements, replace even elements with the results.
+ SimVRegister zero;
+ zero.Clear();
+ zip1(vform, zd, result, zero);
+ }
+}
+
+void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister zn_b, zn_t;
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT((lane_size >= 0) &&
+ (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
+ int right_shift_dist = shift_and_lane_size.first;
+ int left_shift_dist = (8 << lane_size) - right_shift_dist;
+
+ // Construct temporary registers containing the even (bottom) and odd (top)
+ // elements.
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ pack_even_elements(vform_half, zn_b, zn);
+ pack_odd_elements(vform_half, zn_t, zn);
+
+ switch (form_hash_) {
+ case "sshllb_z_zi"_h:
+ sshll(vform, zd, zn_b, left_shift_dist);
+ break;
+ case "sshllt_z_zi"_h:
+ sshll(vform, zd, zn_t, left_shift_dist);
+ break;
+ case "ushllb_z_zi"_h:
+ ushll(vform, zd, zn_b, left_shift_dist);
+ break;
+ case "ushllt_z_zi"_h:
+ ushll(vform, zd, zn_t, left_shift_dist);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ unsigned zm_code = instr->GetRm();
+ int index = -1;
+ bool is_mla = false;
+
+ switch (form_hash_) {
+ case "sqrdmlah_z_zzz"_h:
+ is_mla = true;
+ VIXL_FALLTHROUGH();
+ case "sqrdmlsh_z_zzz"_h:
+ // Nothing to do.
+ break;
+ case "sqrdmlah_z_zzzi_h"_h:
+ is_mla = true;
+ VIXL_FALLTHROUGH();
+ case "sqrdmlsh_z_zzzi_h"_h:
+ vform = kFormatVnH;
+ index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
+ zm_code = instr->ExtractBits(18, 16);
+ break;
+ case "sqrdmlah_z_zzzi_s"_h:
+ is_mla = true;
+ VIXL_FALLTHROUGH();
+ case "sqrdmlsh_z_zzzi_s"_h:
+ vform = kFormatVnS;
+ index = instr->ExtractBits(20, 19);
+ zm_code = instr->ExtractBits(18, 16);
+ break;
+ case "sqrdmlah_z_zzzi_d"_h:
+ is_mla = true;
+ VIXL_FALLTHROUGH();
+ case "sqrdmlsh_z_zzzi_d"_h:
+ vform = kFormatVnD;
+ index = instr->ExtractBit(20);
+ zm_code = instr->ExtractBits(19, 16);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ SimVRegister& zm = ReadVRegister(zm_code);
+ SimVRegister zm_idx;
+ if (index >= 0) {
+ dup_elements_to_segments(vform, zm_idx, zm, index);
+ }
+
+ if (is_mla) {
+ sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
+ } else {
+ sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
+ }
+}
+
+void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
+
+ SimVRegister temp, zm_idx, zn_b, zn_t;
+ Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
+ dup_elements_to_segments(kFormatVnS, temp, zm, index);
+ pack_even_elements(kFormatVnS, zm_idx, temp);
+ pack_even_elements(kFormatVnS, zn_b, zn);
+ pack_odd_elements(kFormatVnS, zn_t, zn);
+
+ switch (form_hash_) {
+ case "sqdmlalb_z_zzzi_d"_h:
+ sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
+ break;
+ case "sqdmlalt_z_zzzi_d"_h:
+ sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
+ break;
+ case "sqdmlslb_z_zzzi_d"_h:
+ sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
+ break;
+ case "sqdmlslt_z_zzzi_d"_h:
+ sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
+ pack_even_elements(kFormatVnH, zn_b, zn);
+ pack_even_elements(kFormatVnH, zm_b, zm);
+ pack_odd_elements(kFormatVnH, zn_t, zn);
+ pack_odd_elements(kFormatVnH, zm_t, zm);
+
+ switch (form_hash_) {
+ case "fmlalb_z_zzz"_h:
+ fmlal(kFormatVnS, zda, zn_b, zm_b);
+ break;
+ case "fmlalt_z_zzz"_h:
+ fmlal(kFormatVnS, zda, zn_t, zm_t);
+ break;
+ case "fmlslb_z_zzz"_h:
+ fmlsl(kFormatVnS, zda, zn_b, zm_b);
+ break;
+ case "fmlslt_z_zzz"_h:
+ fmlsl(kFormatVnS, zda, zn_t, zm_t);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
+
+ SimVRegister temp, zm_idx, zn_b, zn_t;
+ Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
+ dup_elements_to_segments(kFormatVnH, temp, zm, index);
+ pack_even_elements(kFormatVnH, zm_idx, temp);
+ pack_even_elements(kFormatVnH, zn_b, zn);
+ pack_odd_elements(kFormatVnH, zn_t, zn);
+
+ switch (form_hash_) {
+ case "fmlalb_z_zzzi_s"_h:
+ fmlal(kFormatVnS, zda, zn_b, zm_idx);
+ break;
+ case "fmlalt_z_zzzi_s"_h:
+ fmlal(kFormatVnS, zda, zn_t, zm_idx);
+ break;
+ case "fmlslb_z_zzzi_s"_h:
+ fmlsl(kFormatVnS, zda, zn_b, zm_idx);
+ break;
+ case "fmlslt_z_zzzi_s"_h:
+ fmlsl(kFormatVnS, zda, zn_t, zm_idx);
+ break;
+ case "sqdmlalb_z_zzzi_s"_h:
+ sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
+ break;
+ case "sqdmlalt_z_zzzi_s"_h:
+ sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
+ break;
+ case "sqdmlslb_z_zzzi_s"_h:
+ sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
+ break;
+ case "sqdmlslt_z_zzzi_s"_h:
+ sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "sadalp_z_p_z"_h:
+ sadalp(vform, result, zn);
+ break;
+ case "uadalp_z_p_z"_h:
+ uadalp(vform, result, zn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(vform, zda, pg, result);
+}
+
+void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
+ VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister not_zn;
+ not_(vform, not_zn, zn);
+
+ switch (form_hash_) {
+ case "adclb_z_zzz"_h:
+ adcl(vform, zda, zn, zm, /* top = */ false);
+ break;
+ case "adclt_z_zzz"_h:
+ adcl(vform, zda, zn, zm, /* top = */ true);
+ break;
+ case "sbclb_z_zzz"_h:
+ adcl(vform, zda, not_zn, zm, /* top = */ false);
+ break;
+ case "sbclt_z_zzz"_h:
+ adcl(vform, zda, not_zn, zm, /* top = */ true);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ switch (form_hash_) {
+ case "saba_z_zzz"_h:
+ saba(vform, zda, zn, zm);
+ break;
+ case "uaba_z_zzz"_h:
+ uaba(vform, zda, zn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ int rot = instr->ExtractBits(11, 10) * 90;
+ // vform and zm are only valid for the vector form of instruction.
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ // Inputs for indexed form of instruction.
+ SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
+ SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
+ int idx_h = instr->ExtractBits(20, 19);
+ int idx_s = instr->ExtractBit(20);
+
+ switch (form_hash_) {
+ case "cmla_z_zzz"_h:
+ cmla(vform, zda, zda, zn, zm, rot);
+ break;
+ case "cmla_z_zzzi_h"_h:
+ cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
+ break;
+ case "cmla_z_zzzi_s"_h:
+ cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
+ break;
+ case "sqrdcmlah_z_zzz"_h:
+ sqrdcmlah(vform, zda, zda, zn, zm, rot);
+ break;
+ case "sqrdcmlah_z_zzzi_h"_h:
+ sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
+ break;
+ case "sqrdcmlah_z_zzzi_s"_h:
+ sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ int lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT((lane_size >= 0) &&
+ (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int shift_dist = shift_and_lane_size.first;
+
+ switch (form_hash_) {
+ case "srsra_z_zi"_h:
+ srsra(vform, zd, zn, shift_dist);
+ break;
+ case "ssra_z_zi"_h:
+ ssra(vform, zd, zn, shift_dist);
+ break;
+ case "ursra_z_zi"_h:
+ ursra(vform, zd, zn, shift_dist);
+ break;
+ case "usra_z_zi"_h:
+ usra(vform, zd, zn, shift_dist);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+
+ SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
+ zero.Clear();
+
+ VectorFormat vform_half = VectorFormatHalfWidth(vform);
+ uzp1(vform_half, zn_b, zn, zero);
+ uzp1(vform_half, zm_b, zm, zero);
+ uzp2(vform_half, zn_t, zn, zero);
+ uzp2(vform_half, zm_t, zm, zero);
+
+ switch (form_hash_) {
+ case "smlalb_z_zzz"_h:
+ smlal(vform, zda, zn_b, zm_b);
+ break;
+ case "smlalt_z_zzz"_h:
+ smlal(vform, zda, zn_t, zm_t);
+ break;
+ case "smlslb_z_zzz"_h:
+ smlsl(vform, zda, zn_b, zm_b);
+ break;
+ case "smlslt_z_zzz"_h:
+ smlsl(vform, zda, zn_t, zm_t);
+ break;
+ case "sqdmlalb_z_zzz"_h:
+ sqdmlal(vform, zda, zn_b, zm_b);
+ break;
+ case "sqdmlalbt_z_zzz"_h:
+ sqdmlal(vform, zda, zn_b, zm_t);
+ break;
+ case "sqdmlalt_z_zzz"_h:
+ sqdmlal(vform, zda, zn_t, zm_t);
+ break;
+ case "sqdmlslb_z_zzz"_h:
+ sqdmlsl(vform, zda, zn_b, zm_b);
+ break;
+ case "sqdmlslbt_z_zzz"_h:
+ sqdmlsl(vform, zda, zn_b, zm_t);
+ break;
+ case "sqdmlslt_z_zzz"_h:
+ sqdmlsl(vform, zda, zn_t, zm_t);
+ break;
+ case "umlalb_z_zzz"_h:
+ umlal(vform, zda, zn_b, zm_b);
+ break;
+ case "umlalt_z_zzz"_h:
+ umlal(vform, zda, zn_t, zm_t);
+ break;
+ case "umlslb_z_zzz"_h:
+ umlsl(vform, zda, zn_b, zm_b);
+ break;
+ case "umlslt_z_zzz"_h:
+ umlsl(vform, zda, zn_t, zm_t);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zda = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ int rot = instr->ExtractBits(11, 10) * 90;
+ unsigned zm_code = instr->GetRm();
+ int index = -1;
+
+ switch (form_hash_) {
+ case "cdot_z_zzz"_h:
+ // Nothing to do.
+ break;
+ case "cdot_z_zzzi_s"_h:
+ index = zm_code >> 3;
+ zm_code &= 0x7;
+ break;
+ case "cdot_z_zzzi_d"_h:
+ index = zm_code >> 4;
+ zm_code &= 0xf;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+
+ SimVRegister temp;
+ SimVRegister& zm = ReadVRegister(zm_code);
+ if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
+ cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
+}
+
+void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
+ VectorFormat vform = kFormatVnD;
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+ SimVRegister& zk = ReadVRegister(instr->GetRn());
+ SimVRegister temp;
+
+ switch (form_hash_) {
+ case "bcax_z_zzz"_h:
+ bic(vform, temp, zm, zk);
+ eor(vform, zdn, temp, zdn);
+ break;
+ case "bsl1n_z_zzz"_h:
+ not_(vform, temp, zdn);
+ bsl(vform, zdn, zk, temp, zm);
+ break;
+ case "bsl2n_z_zzz"_h:
+ not_(vform, temp, zm);
+ bsl(vform, zdn, zk, zdn, temp);
+ break;
+ case "bsl_z_zzz"_h:
+ bsl(vform, zdn, zk, zdn, zm);
+ break;
+ case "eor3_z_zzz"_h:
+ eor(vform, temp, zdn, zm);
+ eor(vform, zdn, temp, zk);
+ break;
+ case "nbsl_z_zzz"_h:
+ bsl(vform, zdn, zk, zdn, zm);
+ not_(vform, zdn, zdn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "shadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).Halve(vform);
+ break;
+ case "shsub_z_p_zz"_h:
+ sub(vform, result, zdn, zm).Halve(vform);
+ break;
+ case "shsubr_z_p_zz"_h:
+ sub(vform, result, zm, zdn).Halve(vform);
+ break;
+ case "srhadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).Halve(vform).Round(vform);
+ break;
+ case "uhadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).Uhalve(vform);
+ break;
+ case "uhsub_z_p_zz"_h:
+ sub(vform, result, zdn, zm).Uhalve(vform);
+ break;
+ case "uhsubr_z_p_zz"_h:
+ sub(vform, result, zm, zdn).Uhalve(vform);
+ break;
+ case "urhadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "sqadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).SignedSaturate(vform);
+ break;
+ case "sqsub_z_p_zz"_h:
+ sub(vform, result, zdn, zm).SignedSaturate(vform);
+ break;
+ case "sqsubr_z_p_zz"_h:
+ sub(vform, result, zm, zdn).SignedSaturate(vform);
+ break;
+ case "suqadd_z_p_zz"_h:
+ suqadd(vform, result, zdn, zm);
+ break;
+ case "uqadd_z_p_zz"_h:
+ add(vform, result, zdn, zm).UnsignedSaturate(vform);
+ break;
+ case "uqsub_z_p_zz"_h:
+ sub(vform, result, zdn, zm).UnsignedSaturate(vform);
+ break;
+ case "uqsubr_z_p_zz"_h:
+ sub(vform, result, zm, zdn).UnsignedSaturate(vform);
+ break;
+ case "usqadd_z_p_zz"_h:
+ usqadd(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "addp_z_p_zz"_h:
+ addp(vform, result, zdn, zm);
+ break;
+ case "smaxp_z_p_zz"_h:
+ smaxp(vform, result, zdn, zm);
+ break;
+ case "sminp_z_p_zz"_h:
+ sminp(vform, result, zdn, zm);
+ break;
+ case "umaxp_z_p_zz"_h:
+ umaxp(vform, result, zdn, zm);
+ break;
+ case "uminp_z_p_zz"_h:
+ uminp(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "faddp_z_p_zz"_h:
+ faddp(vform, result, zdn, zm);
+ break;
+ case "fmaxnmp_z_p_zz"_h:
+ fmaxnmp(vform, result, zdn, zm);
+ break;
+ case "fmaxp_z_p_zz"_h:
+ fmaxp(vform, result, zdn, zm);
+ break;
+ case "fminnmp_z_p_zz"_h:
+ fminnmp(vform, result, zdn, zm);
+ break;
+ case "fminp_z_p_zz"_h:
+ fminp(vform, result, zdn, zm);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
+ unsigned lane_size = shift_and_lane_size.second;
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int right_shift_dist = shift_and_lane_size.first;
+ int left_shift_dist = (8 << lane_size) - right_shift_dist;
+ SimVRegister result;
+
+ switch (form_hash_) {
+ case "sqshl_z_p_zi"_h:
+ sqshl(vform, result, zdn, left_shift_dist);
+ break;
+ case "sqshlu_z_p_zi"_h:
+ sqshlu(vform, result, zdn, left_shift_dist);
+ break;
+ case "srshr_z_p_zi"_h:
+ sshr(vform, result, zdn, right_shift_dist).Round(vform);
+ break;
+ case "uqshl_z_p_zi"_h:
+ uqshl(vform, result, zdn, left_shift_dist);
+ break;
+ case "urshr_z_p_zi"_h:
+ ushr(vform, result, zdn, right_shift_dist).Round(vform);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ mov_merging(vform, zdn, pg, result);
+}
+
+void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
+ VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h);
+
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+
+ std::pair<int, int> shift_and_lane_size =
+ instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
+ unsigned lane_size = shift_and_lane_size.second;
+ VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
+ int shift_dist = shift_and_lane_size.first;
+ eor(vform, zdn, zdn, zm);
+ ror(vform, zdn, zdn, shift_dist);
+}
+
+void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zm = ReadVRegister(instr->GetRn());
+ int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
+
+ switch (form_hash_) {
+ case "cadd_z_zz"_h:
+ cadd(vform, zdn, zdn, zm, rot);
+ break;
+ case "sqcadd_z_zz"_h:
+ cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+}
+
+void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ uint64_t xm = ReadXRegister(instr->GetRm());
+
+ LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
+ int msize = -1;
+ bool is_signed = false;
+
+ switch (form_hash_) {
+ case "ldnt1b_z_p_ar_d_64_unscaled"_h:
+ msize = 0;
+ break;
+ case "ldnt1d_z_p_ar_d_64_unscaled"_h:
+ msize = 3;
+ break;
+ case "ldnt1h_z_p_ar_d_64_unscaled"_h:
+ msize = 1;
+ break;
+ case "ldnt1sb_z_p_ar_d_64_unscaled"_h:
+ msize = 0;
+ is_signed = true;
+ break;
+ case "ldnt1sh_z_p_ar_d_64_unscaled"_h:
+ msize = 1;
+ is_signed = true;
+ break;
+ case "ldnt1sw_z_p_ar_d_64_unscaled"_h:
+ msize = 2;
+ is_signed = true;
+ break;
+ case "ldnt1w_z_p_ar_d_64_unscaled"_h:
+ msize = 2;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ addr.SetMsizeInBytesLog2(msize);
+ SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
+}
+
+void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ uint64_t xm = ReadXRegister(instr->GetRm());
+
+ LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
+ VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) ||
+ (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) ||
+ (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) ||
+ (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h));
+
+ addr.SetMsizeInBytesLog2(
+ instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
+ SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
+}
+
+void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ uint64_t xm = ReadXRegister(instr->GetRm());
+
+ LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
+ int msize = -1;
+ bool is_signed = false;
+
+ switch (form_hash_) {
+ case "ldnt1b_z_p_ar_s_x32_unscaled"_h:
+ msize = 0;
+ break;
+ case "ldnt1h_z_p_ar_s_x32_unscaled"_h:
+ msize = 1;
+ break;
+ case "ldnt1sb_z_p_ar_s_x32_unscaled"_h:
+ msize = 0;
+ is_signed = true;
+ break;
+ case "ldnt1sh_z_p_ar_s_x32_unscaled"_h:
+ msize = 1;
+ is_signed = true;
+ break;
+ case "ldnt1w_z_p_ar_s_x32_unscaled"_h:
+ msize = 2;
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ addr.SetMsizeInBytesLog2(msize);
+ SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
+}
+
+void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
+ SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ uint64_t xm = ReadXRegister(instr->GetRm());
+
+ LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
+ VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) ||
+ (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) ||
+ (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h));
+
+ addr.SetMsizeInBytesLog2(
+ instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
+ SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
+}
+
void Simulator::VisitReserved(const Instruction* instr) {
// UDF is the only instruction in this group, and the Decoder is precise here.
VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
@@ -2848,11 +4811,17 @@ void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
__sync_synchronize();
}
- MemWrite<T>(address, result);
WriteRegister<T>(rt, data, NoRegLog);
- PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
- LogRead(rt, format, address);
+ unsigned register_size = element_size;
+ if (element_size < kXRegSizeInBytes) {
+ register_size = kWRegSizeInBytes;
+ }
+ PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
+ LogExtendingRead(rt, format, element_size, address);
+
+ MemWrite<T>(address, result);
+ format = GetPrintRegisterFormatForSize(element_size);
LogWrite(rs, format, address);
}
@@ -3162,42 +5131,71 @@ void Simulator::VisitConditionalSelect(const Instruction* instr) {
}
-// clang-format off
-#define PAUTH_MODES(V) \
- V(IA, ReadXRegister(src), kPACKeyIA, kInstructionPointer) \
- V(IB, ReadXRegister(src), kPACKeyIB, kInstructionPointer) \
- V(IZA, 0x00000000, kPACKeyIA, kInstructionPointer) \
- V(IZB, 0x00000000, kPACKeyIB, kInstructionPointer) \
- V(DA, ReadXRegister(src), kPACKeyDA, kDataPointer) \
- V(DB, ReadXRegister(src), kPACKeyDB, kDataPointer) \
- V(DZA, 0x00000000, kPACKeyDA, kDataPointer) \
- V(DZB, 0x00000000, kPACKeyDB, kDataPointer)
-// clang-format on
+#define PAUTH_MODES_REGISTER_CONTEXT(V) \
+ V(IA, kPACKeyIA, kInstructionPointer) \
+ V(IB, kPACKeyIB, kInstructionPointer) \
+ V(DA, kPACKeyDA, kDataPointer) \
+ V(DB, kPACKeyDB, kDataPointer)
+
+#define PAUTH_MODES_ZERO_CONTEXT(V) \
+ V(IZA, kPACKeyIA, kInstructionPointer) \
+ V(IZB, kPACKeyIB, kInstructionPointer) \
+ V(DZA, kPACKeyDA, kDataPointer) \
+ V(DZB, kPACKeyDB, kDataPointer)
void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
unsigned dst = instr->GetRd();
unsigned src = instr->GetRn();
switch (instr->Mask(DataProcessing1SourceMask)) {
-#define DEFINE_PAUTH_FUNCS(SUFFIX, MOD, KEY, D) \
+#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \
case PAC##SUFFIX: { \
+ uint64_t mod = ReadXRegister(src); \
uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AddPAC(ptr, MOD, KEY, D)); \
+ WriteXRegister(dst, AddPAC(ptr, mod, KEY, D)); \
break; \
} \
case AUT##SUFFIX: { \
+ uint64_t mod = ReadXRegister(src); \
uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AuthPAC(ptr, MOD, KEY, D)); \
+ WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
break; \
}
- PAUTH_MODES(DEFINE_PAUTH_FUNCS)
+ PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
+#undef DEFINE_PAUTH_FUNCS
+
+#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \
+ case PAC##SUFFIX: { \
+ if (src != kZeroRegCode) { \
+ VIXL_UNIMPLEMENTED(); \
+ } \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D)); \
+ break; \
+ } \
+ case AUT##SUFFIX: { \
+ if (src != kZeroRegCode) { \
+ VIXL_UNIMPLEMENTED(); \
+ } \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \
+ break; \
+ }
+
+ PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS)
#undef DEFINE_PAUTH_FUNCS
case XPACI:
+ if (src != kZeroRegCode) {
+ VIXL_UNIMPLEMENTED();
+ }
WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
break;
case XPACD:
+ if (src != kZeroRegCode) {
+ VIXL_UNIMPLEMENTED();
+ }
WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
break;
case RBIT_w:
@@ -3471,6 +5469,15 @@ void Simulator::VisitBitfield(const Instruction* instr) {
int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
int R = instr->GetImmR();
int S = instr->GetImmS();
+
+ if (instr->GetSixtyFourBits() != instr->GetBitN()) {
+ VisitUnallocated(instr);
+ }
+
+ if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
+ VisitUnallocated(instr);
+ }
+
int diff = S - R;
uint64_t mask;
if (diff >= 0) {
@@ -4701,10 +6708,10 @@ void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
rev16(vf, rd, rn);
break;
case NEON_SUQADD:
- suqadd(vf, rd, rn);
+ suqadd(vf, rd, rd, rn);
break;
case NEON_USQADD:
- usqadd(vf, rd, rn);
+ usqadd(vf, rd, rd, rn);
break;
case NEON_CLS:
cls(vf, rd, rn);
@@ -5086,7 +7093,7 @@ void Simulator::VisitNEON3Same(const Instruction* instr) {
bit(vf, rd, rn, rm);
break;
case NEON_BSL:
- bsl(vf, rd, rn, rm);
+ bsl(vf, rd, rd, rn, rm);
break;
default:
VIXL_UNIMPLEMENTED();
@@ -5394,30 +7401,31 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
SimVRegister& rm = ReadVRegister(instr->GetRm());
int rot = 0;
VectorFormat vf = nfd.GetVectorFormat();
- if (instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) {
- rot = instr->GetImmRotFcmlaVec();
- fcmla(vf, rd, rn, rm, rd, rot);
- } else if (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD) {
- rot = instr->GetImmRotFcadd();
- fcadd(vf, rd, rn, rm, rot);
- } else {
- switch (instr->Mask(NEON3SameExtraMask)) {
- case NEON_SDOT:
- sdot(vf, rd, rn, rm);
- break;
- case NEON_SQRDMLAH:
- sqrdmlah(vf, rd, rn, rm);
- break;
- case NEON_UDOT:
- udot(vf, rd, rn, rm);
- break;
- case NEON_SQRDMLSH:
- sqrdmlsh(vf, rd, rn, rm);
- break;
- default:
- VIXL_UNIMPLEMENTED();
- break;
- }
+
+ switch (form_hash_) {
+ case "fcmla_asimdsame2_c"_h:
+ rot = instr->GetImmRotFcmlaVec();
+ fcmla(vf, rd, rn, rm, rd, rot);
+ break;
+ case "fcadd_asimdsame2_c"_h:
+ rot = instr->GetImmRotFcadd();
+ fcadd(vf, rd, rn, rm, rot);
+ break;
+ case "sdot_asimdsame2_d"_h:
+ sdot(vf, rd, rn, rm);
+ break;
+ case "udot_asimdsame2_d"_h:
+ udot(vf, rd, rn, rm);
+ break;
+ case "usdot_asimdsame2_d"_h:
+ usdot(vf, rd, rn, rm);
+ break;
+ case "sqrdmlah_asimdsame2_only"_h:
+ sqrdmlah(vf, rd, rn, rm);
+ break;
+ case "sqrdmlsh_asimdsame2_only"_h:
+ sqrdmlsh(vf, rd, rn, rm);
+ break;
}
}
@@ -5671,206 +7679,225 @@ void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
}
}
-
-void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
+void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
NEONFormatDecoder nfd(instr);
- static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
- VectorFormat vf_r = nfd.GetVectorFormat();
- VectorFormat vf_half = nfd.GetVectorFormat(&map_half);
VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
- ByElementOp Op = NULL;
-
int rm_reg = instr->GetRm();
- int rm_low_reg = instr->GetRmLow16();
int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
- int index_hlm = (index << 1) | instr->GetNEONM();
-
- switch (instr->Mask(NEONByIndexedElementFPLongMask)) {
- // These are oddballs and are best handled as special cases.
- // - Rm is encoded with only 4 bits (and must be in the lower 16 registers).
- // - The index is always H:L:M.
- case NEON_FMLAL_H_byelement:
- fmlal(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm);
- return;
- case NEON_FMLAL2_H_byelement:
- fmlal2(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm);
- return;
- case NEON_FMLSL_H_byelement:
- fmlsl(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm);
- return;
- case NEON_FMLSL2_H_byelement:
- fmlsl2(vf_r, rd, rn, ReadVRegister(rm_low_reg), index_hlm);
- return;
- }
-
if (instr->GetNEONSize() == 1) {
- rm_reg = rm_low_reg;
- index = index_hlm;
+ rm_reg = instr->GetRmLow16();
+ index = (index << 1) | instr->GetNEONM();
}
+ SimVRegister& rm = ReadVRegister(rm_reg);
- switch (instr->Mask(NEONByIndexedElementMask)) {
- case NEON_MUL_byelement:
- Op = &Simulator::mul;
- vf = vf_r;
+ SimVRegister temp;
+ VectorFormat indexform =
+ VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
+ dup_element(indexform, temp, rm, index);
+
+ bool is_2 = instr->Mask(NEON_Q) ? true : false;
+
+ switch (form_hash_) {
+ case "smull_asimdelem_l"_h:
+ smull(vf, rd, rn, temp, is_2);
break;
- case NEON_MLA_byelement:
- Op = &Simulator::mla;
- vf = vf_r;
+ case "umull_asimdelem_l"_h:
+ umull(vf, rd, rn, temp, is_2);
break;
- case NEON_MLS_byelement:
- Op = &Simulator::mls;
- vf = vf_r;
+ case "smlal_asimdelem_l"_h:
+ smlal(vf, rd, rn, temp, is_2);
break;
- case NEON_SQDMULH_byelement:
- Op = &Simulator::sqdmulh;
- vf = vf_r;
+ case "umlal_asimdelem_l"_h:
+ umlal(vf, rd, rn, temp, is_2);
break;
- case NEON_SQRDMULH_byelement:
- Op = &Simulator::sqrdmulh;
- vf = vf_r;
+ case "smlsl_asimdelem_l"_h:
+ smlsl(vf, rd, rn, temp, is_2);
break;
- case NEON_SDOT_byelement:
- Op = &Simulator::sdot;
- vf = vf_r;
+ case "umlsl_asimdelem_l"_h:
+ umlsl(vf, rd, rn, temp, is_2);
break;
- case NEON_SQRDMLAH_byelement:
- Op = &Simulator::sqrdmlah;
- vf = vf_r;
+ case "sqdmull_asimdelem_l"_h:
+ sqdmull(vf, rd, rn, temp, is_2);
break;
- case NEON_UDOT_byelement:
- Op = &Simulator::udot;
- vf = vf_r;
+ case "sqdmlal_asimdelem_l"_h:
+ sqdmlal(vf, rd, rn, temp, is_2);
break;
- case NEON_SQRDMLSH_byelement:
- Op = &Simulator::sqrdmlsh;
- vf = vf_r;
+ case "sqdmlsl_asimdelem_l"_h:
+ sqdmlsl(vf, rd, rn, temp, is_2);
break;
- case NEON_SMULL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::smull2;
- } else {
- Op = &Simulator::smull;
- }
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
+ VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
+
+ int index =
+ (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
+
+ switch (form_hash_) {
+ case "fmlal_asimdelem_lh"_h:
+ fmlal(vform, rd, rn, rm, index);
break;
- case NEON_UMULL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::umull2;
- } else {
- Op = &Simulator::umull;
- }
+ case "fmlal2_asimdelem_lh"_h:
+ fmlal2(vform, rd, rn, rm, index);
break;
- case NEON_SMLAL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::smlal2;
- } else {
- Op = &Simulator::smlal;
- }
+ case "fmlsl_asimdelem_lh"_h:
+ fmlsl(vform, rd, rn, rm, index);
break;
- case NEON_UMLAL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::umlal2;
- } else {
- Op = &Simulator::umlal;
- }
+ case "fmlsl2_asimdelem_lh"_h:
+ fmlsl2(vform, rd, rn, rm, index);
break;
- case NEON_SMLSL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::smlsl2;
- } else {
- Op = &Simulator::smlsl;
- }
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ static const NEONFormatMap map =
+ {{23, 22, 30},
+ {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
+ VectorFormat vform = nfd.GetVectorFormat(&map);
+
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+ int rm_reg = instr->GetRm();
+ int index =
+ (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
+
+ if ((vform == kFormat4H) || (vform == kFormat8H)) {
+ rm_reg &= 0xf;
+ } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
+ index >>= 1;
+ } else {
+ VIXL_ASSERT(vform == kFormat2D);
+ VIXL_ASSERT(instr->GetNEONL() == 0);
+ index >>= 2;
+ }
+
+ SimVRegister& rm = ReadVRegister(rm_reg);
+
+ switch (form_hash_) {
+ case "fmul_asimdelem_rh_h"_h:
+ case "fmul_asimdelem_r_sd"_h:
+ fmul(vform, rd, rn, rm, index);
break;
- case NEON_UMLSL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::umlsl2;
- } else {
- Op = &Simulator::umlsl;
- }
+ case "fmla_asimdelem_rh_h"_h:
+ case "fmla_asimdelem_r_sd"_h:
+ fmla(vform, rd, rn, rm, index);
break;
- case NEON_SQDMULL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::sqdmull2;
- } else {
- Op = &Simulator::sqdmull;
- }
+ case "fmls_asimdelem_rh_h"_h:
+ case "fmls_asimdelem_r_sd"_h:
+ fmls(vform, rd, rn, rm, index);
break;
- case NEON_SQDMLAL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::sqdmlal2;
- } else {
- Op = &Simulator::sqdmlal;
- }
+ case "fmulx_asimdelem_rh_h"_h:
+ case "fmulx_asimdelem_r_sd"_h:
+ fmulx(vform, rd, rn, rm, index);
break;
- case NEON_SQDMLSL_byelement:
- if (instr->Mask(NEON_Q)) {
- Op = &Simulator::sqdmlsl2;
- } else {
- Op = &Simulator::sqdmlsl;
- }
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
+void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
+ VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+ int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
+
+ switch (form_hash_) {
+ case "fcmla_asimdelem_c_s"_h:
+ vform = kFormat4S;
+ index >>= 1;
+ VIXL_FALLTHROUGH();
+ case "fcmla_asimdelem_c_h"_h:
+ fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
break;
default:
- index = instr->GetNEONH();
- if (instr->GetFPType() == 0) {
- rm_reg &= 0xf;
- index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
- } else if ((instr->GetFPType() & 1) == 0) {
- index = (index << 1) | instr->GetNEONL();
- }
+ VIXL_UNREACHABLE();
+ }
+}
- vf = nfd.GetVectorFormat(nfd.FPFormatMap());
+void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
+ VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
- switch (instr->Mask(NEONByIndexedElementFPMask)) {
- case NEON_FMUL_H_byelement:
- vf = vf_half;
- VIXL_FALLTHROUGH();
- case NEON_FMUL_byelement:
- Op = &Simulator::fmul;
- break;
- case NEON_FMLA_H_byelement:
- vf = vf_half;
- VIXL_FALLTHROUGH();
- case NEON_FMLA_byelement:
- Op = &Simulator::fmla;
- break;
- case NEON_FMLS_H_byelement:
- vf = vf_half;
- VIXL_FALLTHROUGH();
- case NEON_FMLS_byelement:
- Op = &Simulator::fmls;
- break;
- case NEON_FMULX_H_byelement:
- vf = vf_half;
- VIXL_FALLTHROUGH();
- case NEON_FMULX_byelement:
- Op = &Simulator::fmulx;
- break;
- default:
- if (instr->GetNEONSize() == 2) {
- index = instr->GetNEONH();
- } else {
- index = (instr->GetNEONH() << 1) | instr->GetNEONL();
- }
- switch (instr->Mask(NEONByIndexedElementFPComplexMask)) {
- case NEON_FCMLA_byelement:
- vf = vf_r;
- fcmla(vf,
- rd,
- rn,
- ReadVRegister(instr->GetRm()),
- index,
- instr->GetImmRotFcmlaSca());
- return;
- default:
- VIXL_UNIMPLEMENTED();
- }
- }
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+ int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
+
+ SimVRegister temp;
+ // NEON indexed `dot` allows the index value exceed the register size.
+ // Promote the format to Q-sized vector format before the duplication.
+ dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
+
+ switch (form_hash_) {
+ case "sdot_asimdelem_d"_h:
+ sdot(vform, rd, rn, temp);
+ break;
+ case "udot_asimdelem_d"_h:
+ udot(vform, rd, rn, temp);
+ break;
+ case "sudot_asimdelem_d"_h:
+ usdot(vform, rd, temp, rn);
+ break;
+ case "usdot_asimdelem_d"_h:
+ usdot(vform, rd, rn, temp);
+ break;
}
+}
- (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
+void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ VectorFormat vform = nfd.GetVectorFormat();
+
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+ int rm_reg = instr->GetRm();
+ int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
+
+ if ((vform == kFormat4H) || (vform == kFormat8H)) {
+ rm_reg &= 0xf;
+ index = (index << 1) | instr->GetNEONM();
+ }
+
+ SimVRegister& rm = ReadVRegister(rm_reg);
+
+ switch (form_hash_) {
+ case "mul_asimdelem_r"_h:
+ mul(vform, rd, rn, rm, index);
+ break;
+ case "mla_asimdelem_r"_h:
+ mla(vform, rd, rn, rm, index);
+ break;
+ case "mls_asimdelem_r"_h:
+ mls(vform, rd, rn, rm, index);
+ break;
+ case "sqdmulh_asimdelem_r"_h:
+ sqdmulh(vform, rd, rn, rm, index);
+ break;
+ case "sqrdmulh_asimdelem_r"_h:
+ sqrdmulh(vform, rd, rn, rm, index);
+ break;
+ case "sqrdmlah_asimdelem_r"_h:
+ sqrdmlah(vform, rd, rn, rm, index);
+ break;
+ case "sqrdmlsh_asimdelem_r"_h:
+ sqrdmlsh(vform, rd, rn, rm, index);
+ break;
+ }
}
@@ -5882,11 +7909,11 @@ void Simulator::VisitNEONCopy(const Instruction* instr) {
SimVRegister& rn = ReadVRegister(instr->GetRn());
int imm5 = instr->GetImmNEON5();
int tz = CountTrailingZeros(imm5, 32);
- int reg_index = imm5 >> (tz + 1);
+ int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
int imm4 = instr->GetImmNEON4();
- int rn_index = imm4 >> tz;
+ int rn_index = ExtractSignedBitfield32(31, tz, imm4);
ins_element(vf, rd, reg_index, rn, rn_index);
} else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
@@ -6485,10 +8512,10 @@ void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
neg(vf, rd, rn).SignedSaturate(vf);
break;
case NEON_SUQADD_scalar:
- suqadd(vf, rd, rn);
+ suqadd(vf, rd, rd, rn);
break;
case NEON_USQADD_scalar:
- usqadd(vf, rd, rn);
+ usqadd(vf, rd, rd, rn);
break;
default:
VIXL_UNIMPLEMENTED();
@@ -6943,7 +8970,7 @@ void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
int imm5 = instr->GetImmNEON5();
int tz = CountTrailingZeros(imm5, 32);
- int rn_index = imm5 >> (tz + 1);
+ int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
dup_element(vf, rd, rn, rn_index);
} else {
VIXL_UNIMPLEMENTED();
@@ -7415,7 +9442,7 @@ void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
SimVRegister& zm = ReadVRegister(instr->GetRm());
Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
- LogicalOp logical_op;
+ LogicalOp logical_op = LogicalOpMask;
switch (op) {
case AND_z_zz:
logical_op = AND;
@@ -7430,7 +9457,6 @@ void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
logical_op = ORR;
break;
default:
- logical_op = LogicalOpMask;
VIXL_UNIMPLEMENTED();
break;
}
@@ -7492,46 +9518,78 @@ void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
SimVRegister& zdn = ReadVRegister(instr->GetRd());
SimVRegister& zm = ReadVRegister(instr->GetRn());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
-
SimVRegister result;
- SimVRegister shiftand; // Vector to be shifted.
- SimVRegister shiftor; // Vector shift amount.
- Shift shift_op = ASR;
- mov(vform, shiftand, zdn);
- mov(vform, shiftor, zm);
+ // SVE uses the whole (saturated) lane for the shift amount.
+ bool shift_in_ls_byte = false;
- switch (instr->Mask(SVEBitwiseShiftByVector_PredicatedMask)) {
- case ASRR_z_p_zz:
- mov(vform, shiftand, zm);
- mov(vform, shiftor, zdn);
- VIXL_FALLTHROUGH();
- case ASR_z_p_zz:
+ switch (form_hash_) {
+ case "asrr_z_p_zz"_h:
+ sshr(vform, result, zm, zdn);
break;
- case LSLR_z_p_zz:
- mov(vform, shiftand, zm);
- mov(vform, shiftor, zdn);
- VIXL_FALLTHROUGH();
- case LSL_z_p_zz:
- shift_op = LSL;
+ case "asr_z_p_zz"_h:
+ sshr(vform, result, zdn, zm);
break;
- case LSRR_z_p_zz:
- mov(vform, shiftand, zm);
- mov(vform, shiftor, zdn);
- VIXL_FALLTHROUGH();
- case LSR_z_p_zz:
- shift_op = LSR;
+ case "lslr_z_p_zz"_h:
+ sshl(vform, result, zm, zdn, shift_in_ls_byte);
+ break;
+ case "lsl_z_p_zz"_h:
+ sshl(vform, result, zdn, zm, shift_in_ls_byte);
+ break;
+ case "lsrr_z_p_zz"_h:
+ ushr(vform, result, zm, zdn);
+ break;
+ case "lsr_z_p_zz"_h:
+ ushr(vform, result, zdn, zm);
+ break;
+ case "sqrshl_z_p_zz"_h:
+ sshl(vform, result, zdn, zm, shift_in_ls_byte)
+ .Round(vform)
+ .SignedSaturate(vform);
+ break;
+ case "sqrshlr_z_p_zz"_h:
+ sshl(vform, result, zm, zdn, shift_in_ls_byte)
+ .Round(vform)
+ .SignedSaturate(vform);
+ break;
+ case "sqshl_z_p_zz"_h:
+ sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
+ break;
+ case "sqshlr_z_p_zz"_h:
+ sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
+ break;
+ case "srshl_z_p_zz"_h:
+ sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
+ break;
+ case "srshlr_z_p_zz"_h:
+ sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
+ break;
+ case "uqrshl_z_p_zz"_h:
+ ushl(vform, result, zdn, zm, shift_in_ls_byte)
+ .Round(vform)
+ .UnsignedSaturate(vform);
+ break;
+ case "uqrshlr_z_p_zz"_h:
+ ushl(vform, result, zm, zdn, shift_in_ls_byte)
+ .Round(vform)
+ .UnsignedSaturate(vform);
+ break;
+ case "uqshl_z_p_zz"_h:
+ ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
+ break;
+ case "uqshlr_z_p_zz"_h:
+ ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
+ break;
+ case "urshl_z_p_zz"_h:
+ ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
+ break;
+ case "urshlr_z_p_zz"_h:
+ ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
break;
default:
VIXL_UNIMPLEMENTED();
break;
}
- SVEBitwiseShiftHelper(shift_op,
- vform,
- result,
- shiftand,
- shiftor,
- /* is_wide_elements = */ false);
mov_merging(vform, zdn, pg, result);
}
@@ -7571,7 +9629,7 @@ void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimVRegister& zn = ReadVRegister(instr->GetRn());
- Shift shift_op;
+ Shift shift_op = NO_SHIFT;
switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
case ASR_z_zi:
case ASR_z_zw:
@@ -7586,7 +9644,6 @@ void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
shift_op = LSR;
break;
default:
- shift_op = NO_SHIFT;
VIXL_UNIMPLEMENTED();
break;
}
@@ -7846,6 +9903,8 @@ void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
SimVRegister& zm = ReadVRegister(instr->GetRn());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
case FADDA_v_p_z:
fadda(vform, vdn, pg, zm);
@@ -7862,8 +9921,9 @@ void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
SimVRegister& zm = ReadVRegister(instr->GetRn());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
- SimVRegister result;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+ SimVRegister result;
switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
case FABD_z_p_zz:
fabd(vform, result, zdn, zm);
@@ -7968,6 +10028,8 @@ void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimVRegister& zm = ReadVRegister(instr->GetRn());
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
case FTMAD_z_zzi:
ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
@@ -7984,6 +10046,8 @@ void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimVRegister& zm = ReadVRegister(instr->GetRm());
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
case FADD_z_zz:
fadd(vform, zd, zn, zm);
@@ -8017,6 +10081,8 @@ void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister result;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPCompareVectorsMask)) {
case FACGE_p_p_zz:
fabscmp(vform, result, zn, zm, ge);
@@ -8053,8 +10119,10 @@ void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
VectorFormat vform = instr->GetSVEVectorFormat();
- SimVRegister result;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
+ SimVRegister result;
SimVRegister zeros;
dup_immediate(kFormatVnD, zeros, 0);
@@ -8184,6 +10252,8 @@ void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
uint64_t inactive_value = 0;
FastReduceFn fn = nullptr;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPFastReductionMask)) {
case FADDV_v_p_z:
fn = &Simulator::faddv;
@@ -8217,24 +10287,17 @@ void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
VectorFormat vform = kFormatUndefined;
- unsigned zm_code = instr->GetRm() & 0xf;
- unsigned index = instr->ExtractBits(20, 19);
switch (instr->Mask(SVEFPMulIndexMask)) {
case FMUL_z_zzi_d:
vform = kFormatVnD;
- index >>= 1; // Only bit 20 is the index for D lanes.
break;
case FMUL_z_zzi_h_i3h:
- index += 4; // Bit 22 (i3h) is the top bit of index.
- VIXL_FALLTHROUGH();
case FMUL_z_zzi_h:
vform = kFormatVnH;
- zm_code &= 7; // Three bits used for zm.
break;
case FMUL_z_zzi_s:
vform = kFormatVnS;
- zm_code &= 7; // Three bits used for zm.
break;
default:
VIXL_UNIMPLEMENTED();
@@ -8245,17 +10308,18 @@ void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimVRegister temp;
- dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+ dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
fmul(vform, zd, zn, temp);
}
void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
VectorFormat vform = instr->GetSVEVectorFormat();
-
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
SimVRegister result;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
if (instr->ExtractBit(15) == 0) {
// Floating-point multiply-accumulate writing addend.
SimVRegister& zm = ReadVRegister(instr->GetRm());
@@ -8319,30 +10383,21 @@ void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
VectorFormat vform = kFormatUndefined;
- unsigned zm_code = 0xffffffff;
- unsigned index = 0xffffffff;
switch (instr->Mask(SVEFPMulAddIndexMask)) {
case FMLA_z_zzzi_d:
case FMLS_z_zzzi_d:
vform = kFormatVnD;
- zm_code = instr->GetRmLow16();
- // Only bit 20 is the index for D lanes.
- index = instr->ExtractBit(20);
break;
case FMLA_z_zzzi_s:
case FMLS_z_zzzi_s:
vform = kFormatVnS;
- zm_code = instr->GetRm() & 0x7; // Three bits used for zm.
- index = instr->ExtractBits(20, 19);
break;
case FMLA_z_zzzi_h:
case FMLS_z_zzzi_h:
case FMLA_z_zzzi_h_i3h:
case FMLS_z_zzzi_h_i3h:
vform = kFormatVnH;
- zm_code = instr->GetRm() & 0x7; // Three bits used for zm.
- index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
break;
default:
VIXL_UNIMPLEMENTED();
@@ -8353,7 +10408,7 @@ void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimVRegister temp;
- dup_elements_to_segments(vform, temp, ReadVRegister(zm_code), index);
+ dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
if (instr->ExtractBit(10) == 1) {
fmls(vform, zd, zd, zn, temp);
} else {
@@ -8425,44 +10480,40 @@ void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
- int dst_data_size;
- int src_data_size;
+ VectorFormat dst_data_size = kFormatUndefined;
+ VectorFormat src_data_size = kFormatUndefined;
switch (instr->Mask(SVEFPConvertPrecisionMask)) {
case FCVT_z_p_z_d2h:
- dst_data_size = kHRegSize;
- src_data_size = kDRegSize;
+ dst_data_size = kFormatVnH;
+ src_data_size = kFormatVnD;
break;
case FCVT_z_p_z_d2s:
- dst_data_size = kSRegSize;
- src_data_size = kDRegSize;
+ dst_data_size = kFormatVnS;
+ src_data_size = kFormatVnD;
break;
case FCVT_z_p_z_h2d:
- dst_data_size = kDRegSize;
- src_data_size = kHRegSize;
+ dst_data_size = kFormatVnD;
+ src_data_size = kFormatVnH;
break;
case FCVT_z_p_z_h2s:
- dst_data_size = kSRegSize;
- src_data_size = kHRegSize;
+ dst_data_size = kFormatVnS;
+ src_data_size = kFormatVnH;
break;
case FCVT_z_p_z_s2d:
- dst_data_size = kDRegSize;
- src_data_size = kSRegSize;
+ dst_data_size = kFormatVnD;
+ src_data_size = kFormatVnS;
break;
case FCVT_z_p_z_s2h:
- dst_data_size = kHRegSize;
- src_data_size = kSRegSize;
+ dst_data_size = kFormatVnH;
+ src_data_size = kFormatVnS;
break;
default:
VIXL_UNIMPLEMENTED();
- dst_data_size = 0;
- src_data_size = 0;
break;
}
- VectorFormat vform =
- SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
- fcvt(vform, dst_data_size, src_data_size, zd, pg, zn);
+ fcvt(dst_data_size, src_data_size, zd, pg, zn);
}
void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
@@ -8494,6 +10545,8 @@ void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
bool exact_exception = false;
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
case FRINTA_z_p_z:
fpcr_rounding = FPTieAway;
@@ -8592,6 +10645,8 @@ void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
case FRECPE_z_z:
frecpe(vform, zd, zn, fpcr_rounding);
@@ -8973,33 +11028,52 @@ void Simulator::VisitSVEIntCompareScalarCountAndLimit(
int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
uint64_t usrc2 = ssrc2 & mask;
+ bool reverse = (form_hash_ == "whilege_p_p_rr"_h) ||
+ (form_hash_ == "whilegt_p_p_rr"_h) ||
+ (form_hash_ == "whilehi_p_p_rr"_h) ||
+ (form_hash_ == "whilehs_p_p_rr"_h);
+
+ int lane_count = LaneCountFromFormat(vform);
bool last = true;
- for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
+ for (int i = 0; i < lane_count; i++) {
usrc1 &= mask;
int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
bool cond = false;
- switch (instr->Mask(SVEIntCompareScalarCountAndLimitMask)) {
- case WHILELE_p_p_rr:
+ switch (form_hash_) {
+ case "whilele_p_p_rr"_h:
cond = ssrc1 <= ssrc2;
break;
- case WHILELO_p_p_rr:
+ case "whilelo_p_p_rr"_h:
cond = usrc1 < usrc2;
break;
- case WHILELS_p_p_rr:
+ case "whilels_p_p_rr"_h:
cond = usrc1 <= usrc2;
break;
- case WHILELT_p_p_rr:
+ case "whilelt_p_p_rr"_h:
cond = ssrc1 < ssrc2;
break;
+ case "whilege_p_p_rr"_h:
+ cond = ssrc1 >= ssrc2;
+ break;
+ case "whilegt_p_p_rr"_h:
+ cond = ssrc1 > ssrc2;
+ break;
+ case "whilehi_p_p_rr"_h:
+ cond = usrc1 > usrc2;
+ break;
+ case "whilehs_p_p_rr"_h:
+ cond = usrc1 >= usrc2;
+ break;
default:
VIXL_UNIMPLEMENTED();
break;
}
last = last && cond;
LogicPRegister dst(pd);
+ int lane = reverse ? ((lane_count - 1) - i) : i;
dst.SetActive(vform, lane, last);
- usrc1++;
+ usrc1 += reverse ? -1 : 1;
}
PredTest(vform, GetPTrue(), pd);
@@ -9013,7 +11087,7 @@ void Simulator::VisitSVEConditionallyTerminateScalars(
bool is_64_bit = instr->ExtractBit(22) == 1;
uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
- bool term;
+ bool term = false;
switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
case CTERMEQ_rr:
term = src1 == src2;
@@ -9022,7 +11096,6 @@ void Simulator::VisitSVEConditionallyTerminateScalars(
term = src1 != src2;
break;
default:
- term = false;
VIXL_UNIMPLEMENTED();
break;
}
@@ -9033,7 +11106,7 @@ void Simulator::VisitSVEConditionallyTerminateScalars(
void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
bool commute_inputs = false;
- Condition cond;
+ Condition cond = al;
switch (instr->Mask(SVEIntCompareSignedImmMask)) {
case CMPEQ_p_p_zi:
cond = eq;
@@ -9056,7 +11129,6 @@ void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
cond = ne;
break;
default:
- cond = al;
VIXL_UNIMPLEMENTED();
break;
}
@@ -9078,7 +11150,7 @@ void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
bool commute_inputs = false;
- Condition cond;
+ Condition cond = al;
switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
case CMPHI_p_p_zi:
cond = hi;
@@ -9095,7 +11167,6 @@ void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
commute_inputs = true;
break;
default:
- cond = al;
VIXL_UNIMPLEMENTED();
break;
}
@@ -9229,8 +11300,6 @@ void Simulator::VisitSVEConstructivePrefix_Unpredicated(
switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
case MOVPRFX_z_z:
mov(kFormatVnD, zd, zn); // The lane size is arbitrary.
- // Record the movprfx, so the next ExecuteInstruction() can check it.
- movprfx_ = instr;
break;
default:
VIXL_UNIMPLEMENTED();
@@ -9274,13 +11343,16 @@ void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
SimVRegister& zn = ReadVRegister(instr->GetRn());
SimVRegister& zm = ReadVRegister(instr->GetRm());
- switch (instr->Mask(SVEIntMulAddUnpredicatedMask)) {
- case SDOT_z_zzz:
+ switch (form_hash_) {
+ case "sdot_z_zzz"_h:
sdot(vform, zda, zn, zm);
break;
- case UDOT_z_zzz:
+ case "udot_z_zzz"_h:
udot(vform, zda, zn, zm);
break;
+ case "usdot_z_zzz_s"_h:
+ usdot(vform, zda, zn, zm);
+ break;
default:
VIXL_UNIMPLEMENTED();
break;
@@ -9300,9 +11372,6 @@ void Simulator::VisitSVEMovprfx(const Instruction* instr) {
} else {
mov_zeroing(vform, zd, pg, zn);
}
-
- // Record the movprfx, so the next ExecuteInstruction() can check it.
- movprfx_ = instr;
break;
default:
VIXL_UNIMPLEMENTED();
@@ -9419,6 +11488,8 @@ void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
SimVRegister& zd = ReadVRegister(instr->GetRd());
+ if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
+
SimVRegister result;
switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
case FCPY_z_p_i: {
@@ -10116,69 +12187,59 @@ void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
/* is_signed = */ false);
}
-void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusImm(
+void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
const Instruction* instr) {
SimVRegister& zt = ReadVRegister(instr->GetRt());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ uint64_t dwords = 2;
+ VectorFormat vform_dst = kFormatVnQ;
+ if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) ||
+ (form_hash_ == "ld1roh_z_p_bi_u16"_h) ||
+ (form_hash_ == "ld1row_z_p_bi_u32"_h) ||
+ (form_hash_ == "ld1rod_z_p_bi_u64"_h)) {
+ dwords = 4;
+ vform_dst = kFormatVnO;
+ }
+
uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
- uint64_t offset = instr->ExtractSignedBits(19, 16) * 16;
+ uint64_t offset =
+ instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
+ int msz = instr->ExtractBits(24, 23);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
- VectorFormat vform = kFormatUndefined;
- switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusImmMask)) {
- case LD1RQB_z_p_bi_u8:
- vform = kFormatVnB;
- break;
- case LD1RQD_z_p_bi_u64:
- vform = kFormatVnD;
- break;
- case LD1RQH_z_p_bi_u16:
- vform = kFormatVnH;
- break;
- case LD1RQW_z_p_bi_u32:
- vform = kFormatVnS;
- break;
- default:
- addr = offset = 0;
- break;
+ for (unsigned i = 0; i < dwords; i++) {
+ ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes));
}
- ld1(kFormat16B, zt, addr + offset);
mov_zeroing(vform, zt, pg, zt);
- dup_element(kFormatVnQ, zt, zt, 0);
+ dup_element(vform_dst, zt, zt, 0);
}
-void Simulator::VisitSVELoadAndBroadcastQuadword_ScalarPlusScalar(
+void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
const Instruction* instr) {
SimVRegister& zt = ReadVRegister(instr->GetRt());
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
+ uint64_t bytes = 16;
+ VectorFormat vform_dst = kFormatVnQ;
+ if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) ||
+ (form_hash_ == "ld1roh_z_p_br_contiguous"_h) ||
+ (form_hash_ == "ld1row_z_p_br_contiguous"_h) ||
+ (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) {
+ bytes = 32;
+ vform_dst = kFormatVnO;
+ }
+
uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
uint64_t offset = ReadXRegister(instr->GetRm());
-
- VectorFormat vform = kFormatUndefined;
- switch (instr->Mask(SVELoadAndBroadcastQuadword_ScalarPlusScalarMask)) {
- case LD1RQB_z_p_br_contiguous:
- vform = kFormatVnB;
- break;
- case LD1RQD_z_p_br_contiguous:
- vform = kFormatVnD;
- offset <<= 3;
- break;
- case LD1RQH_z_p_br_contiguous:
- vform = kFormatVnH;
- offset <<= 1;
- break;
- case LD1RQW_z_p_br_contiguous:
- vform = kFormatVnS;
- offset <<= 2;
- break;
- default:
- addr = offset = 0;
- break;
+ int msz = instr->ExtractBits(24, 23);
+ VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
+ offset <<= msz;
+ for (unsigned i = 0; i < bytes; i++) {
+ ld1(kFormatVnB, zt, i, addr + offset + i);
}
- ld1(kFormat16B, zt, addr + offset);
mov_zeroing(vform, zt, pg, zt);
- dup_element(kFormatVnQ, zt, zt, 0);
+ dup_element(vform_dst, zt, zt, 0);
}
void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
@@ -10726,35 +12787,78 @@ void Simulator::VisitSVEMulIndex(const Instruction* instr) {
VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister& zda = ReadVRegister(instr->GetRd());
SimVRegister& zn = ReadVRegister(instr->GetRn());
+ std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
+ SimVRegister zm = ReadVRegister(zm_and_index.first);
+ int index = zm_and_index.second;
+
+ SimVRegister temp;
+ dup_elements_to_segments(vform, temp, zm, index);
+
+ switch (form_hash_) {
+ case "sdot_z_zzzi_d"_h:
+ case "sdot_z_zzzi_s"_h:
+ sdot(vform, zda, zn, temp);
+ break;
+ case "udot_z_zzzi_d"_h:
+ case "udot_z_zzzi_s"_h:
+ udot(vform, zda, zn, temp);
+ break;
+ case "sudot_z_zzzi_s"_h:
+ usdot(vform, zda, temp, zn);
+ break;
+ case "usdot_z_zzzi_s"_h:
+ usdot(vform, zda, zn, temp);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
- switch (instr->Mask(SVEMulIndexMask)) {
- case SDOT_z_zzzi_d:
- sdot(vform,
- zda,
- zn,
- ReadVRegister(instr->ExtractBits(19, 16)),
- instr->ExtractBit(20));
+void Simulator::SimulateMatrixMul(const Instruction* instr) {
+ VectorFormat vform = kFormatVnS;
+ SimVRegister& dn = ReadVRegister(instr->GetRd());
+ SimVRegister& n = ReadVRegister(instr->GetRn());
+ SimVRegister& m = ReadVRegister(instr->GetRm());
+
+ bool n_signed = false;
+ bool m_signed = false;
+ switch (form_hash_) {
+ case "smmla_asimdsame2_g"_h:
+ vform = kFormat4S;
+ VIXL_FALLTHROUGH();
+ case "smmla_z_zzz"_h:
+ n_signed = m_signed = true;
break;
- case SDOT_z_zzzi_s:
- sdot(vform,
- zda,
- zn,
- ReadVRegister(instr->ExtractBits(18, 16)),
- instr->ExtractBits(20, 19));
+ case "ummla_asimdsame2_g"_h:
+ vform = kFormat4S;
+ VIXL_FALLTHROUGH();
+ case "ummla_z_zzz"_h:
+ // Nothing to do.
break;
- case UDOT_z_zzzi_d:
- udot(vform,
- zda,
- zn,
- ReadVRegister(instr->ExtractBits(19, 16)),
- instr->ExtractBit(20));
+ case "usmmla_asimdsame2_g"_h:
+ vform = kFormat4S;
+ VIXL_FALLTHROUGH();
+ case "usmmla_z_zzz"_h:
+ m_signed = true;
break;
- case UDOT_z_zzzi_s:
- udot(vform,
- zda,
- zn,
- ReadVRegister(instr->ExtractBits(18, 16)),
- instr->ExtractBits(20, 19));
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+ matmul(vform, dn, n, m, n_signed, m_signed);
+}
+
+void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
+ SimVRegister& zdn = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ switch (form_hash_) {
+ case "fmmla_z_zzz_s"_h:
+ case "fmmla_z_zzz_d"_h:
+ fmatmul(vform, zdn, zn, zm);
break;
default:
VIXL_UNIMPLEMENTED();
@@ -10896,9 +13000,7 @@ void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
// Second source register "Zm" is encoded where "Zn" would usually be.
SimVRegister& zm = ReadVRegister(instr->GetRn());
- const int imm8h_mask = 0x001F0000;
- const int imm8l_mask = 0x00001C00;
- int index = instr->ExtractBits<imm8h_mask | imm8l_mask>();
+ int index = instr->GetSVEExtractImmediate();
int vl = GetVectorLengthInBytes();
index = (index >= vl) ? 0 : index;
@@ -11199,15 +13301,19 @@ void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
mov_merging(chunk_form, zd, pg, result);
}
-void Simulator::VisitSVEVectorSplice_Destructive(const Instruction* instr) {
+void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
VectorFormat vform = instr->GetSVEVectorFormat();
- SimVRegister& zdn = ReadVRegister(instr->GetRd());
- SimVRegister& zm = ReadVRegister(instr->GetRn());
+ SimVRegister& zd = ReadVRegister(instr->GetRd());
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
- switch (instr->Mask(SVEVectorSplice_DestructiveMask)) {
- case SPLICE_z_p_zz_des:
- splice(vform, zdn, pg, zdn, zm);
+ switch (form_hash_) {
+ case "splice_z_p_zz_des"_h:
+ splice(vform, zd, pg, zd, zn);
+ break;
+ case "splice_z_p_zz_con"_h:
+ splice(vform, zd, pg, zn, zn2);
break;
default:
VIXL_UNIMPLEMENTED();
@@ -11315,15 +13421,24 @@ void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
}
void Simulator::VisitSVETableLookup(const Instruction* instr) {
+ VectorFormat vform = instr->GetSVEVectorFormat();
SimVRegister& zd = ReadVRegister(instr->GetRd());
- switch (instr->Mask(SVETableLookupMask)) {
- case TBL_z_zz_1:
- Table(instr->GetSVEVectorFormat(),
- zd,
- ReadVRegister(instr->GetRn()),
- ReadVRegister(instr->GetRm()));
- return;
+ SimVRegister& zn = ReadVRegister(instr->GetRn());
+ SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
+ SimVRegister& zm = ReadVRegister(instr->GetRm());
+
+ switch (form_hash_) {
+ case "tbl_z_zz_1"_h:
+ tbl(vform, zd, zn, zm);
+ break;
+ case "tbl_z_zz_2"_h:
+ tbl(vform, zd, zn, zn2, zm);
+ break;
+ case "tbx_z_zz"_h:
+ tbx(vform, zd, zn, zm);
+ break;
default:
+ VIXL_UNIMPLEMENTED();
break;
}
}
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 44fb0cdb..1fdbb6f6 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -28,6 +28,7 @@
#define VIXL_AARCH64_SIMULATOR_AARCH64_H_
#include <memory>
+#include <unordered_map>
#include <vector>
#include "../globals-vixl.h"
@@ -555,6 +556,13 @@ class LogicVRegister {
return element;
}
+ int UintArray(VectorFormat vform, uint64_t* dst) const {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ dst[i] = Uint(vform, i);
+ }
+ return LaneCountFromFormat(vform);
+ }
+
uint64_t UintLeftJustified(VectorFormat vform, int index) const {
return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
}
@@ -638,6 +646,8 @@ class LogicVRegister {
register_.Insert(index, value);
}
+ void Clear() { register_.Clear(); }
+
// When setting a result in a register larger than the result itself, the top
// bits of the register must be cleared.
void ClearForWrite(VectorFormat vform) const {
@@ -1131,11 +1141,6 @@ class Simulator : public DecoderVisitor {
VIXL_ASSERT(IsWordAligned(pc_));
pc_modified_ = false;
- if (movprfx_ != NULL) {
- VIXL_CHECK(pc_->CanTakeSVEMovprfx(movprfx_));
- movprfx_ = NULL;
- }
-
// On guarded pages, if BType is not zero, take an exception on any
// instruction other than BTI, PACI[AB]SP, HLT or BRK.
if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
@@ -1150,6 +1155,9 @@ class Simulator : public DecoderVisitor {
}
}
+ bool last_instr_was_movprfx =
+ (form_hash_ == "movprfx_z_z"_h) || (form_hash_ == "movprfx_z_p_z"_h);
+
// decoder_->Decode(...) triggers at least the following visitors:
// 1. The CPUFeaturesAuditor (`cpu_features_auditor_`).
// 2. The PrintDisassembler (`print_disasm_`), if enabled.
@@ -1157,6 +1165,13 @@ class Simulator : public DecoderVisitor {
// User can add additional visitors at any point, but the Simulator requires
// that the ordering above is preserved.
decoder_->Decode(pc_);
+
+ if (last_instr_was_movprfx) {
+ VIXL_ASSERT(last_instr_ != NULL);
+ VIXL_CHECK(pc_->CanTakeSVEMovprfx(form_hash_, last_instr_));
+ }
+
+ last_instr_ = ReadPc();
IncrementPc();
LogAllWrittenRegisters();
UpdateBType();
@@ -1164,18 +1179,75 @@ class Simulator : public DecoderVisitor {
VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable());
}
-// Declare all Visitor functions.
-#define DECLARE(A) \
- virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
+ virtual void Visit(Metadata* metadata,
+ const Instruction* instr) VIXL_OVERRIDE;
+
+#define DECLARE(A) virtual void Visit##A(const Instruction* instr);
VISITOR_LIST_THAT_RETURN(DECLARE)
#undef DECLARE
-
-
#define DECLARE(A) \
- VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr) VIXL_OVERRIDE;
+ VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr);
VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
#undef DECLARE
+ void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
+ void Simulate_PdT_Xn_Xm(const Instruction* instr);
+ void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
+ void Simulate_ZdB_ZnB_ZmB(const Instruction* instr);
+ void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr);
+ void Simulate_ZdH_PgM_ZnS(const Instruction* instr);
+ void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr);
+ void Simulate_ZdS_PgM_ZnD(const Instruction* instr);
+ void Simulate_ZdS_PgM_ZnS(const Instruction* instr);
+ void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr);
+ void Simulate_ZdT_PgM_ZnT(const Instruction* instr);
+ void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
+ void Simulate_ZdT_ZnT_ZmT(const Instruction* instr);
+ void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr);
+ void Simulate_ZdT_ZnT_const(const Instruction* instr);
+ void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
+ void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
+ void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr);
+ void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
+ void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
+ void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr);
+ void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr);
+ void Simulate_ZdaT_ZnT_const(const Instruction* instr);
+ void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr);
+ void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
+ void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr);
+ void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
+ void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
+ void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr);
+ void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
+ void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr);
+
+ void SimulateSVEHalvingAddSub(const Instruction* instr);
+ void SimulateSVESaturatingArithmetic(const Instruction* instr);
+ void SimulateSVEIntArithPair(const Instruction* instr);
+ void SimulateSVENarrow(const Instruction* instr);
+ void SimulateSVEInterleavedArithLong(const Instruction* instr);
+ void SimulateSVEShiftLeftImm(const Instruction* instr);
+ void SimulateSVEAddSubCarry(const Instruction* instr);
+ void SimulateSVEAddSubHigh(const Instruction* instr);
+ void SimulateSVEIntMulLongVec(const Instruction* instr);
+ void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr);
+ void SimulateSVEExclusiveOrRotate(const Instruction* instr);
+ void SimulateSVEBitwiseTernary(const Instruction* instr);
+ void SimulateSVEComplexDotProduct(const Instruction* instr);
+ void SimulateSVEMulIndex(const Instruction* instr);
+ void SimulateSVEMlaMlsIndex(const Instruction* instr);
+ void SimulateSVEComplexIntMulAdd(const Instruction* instr);
+ void SimulateSVESaturatingMulAddHigh(const Instruction* instr);
+ void SimulateSVESaturatingMulHighIndex(const Instruction* instr);
+ void SimulateSVEFPConvertLong(const Instruction* instr);
+ void SimulateMatrixMul(const Instruction* instr);
+ void SimulateSVEFPMatrixMul(const Instruction* instr);
+ void SimulateNEONMulByElementLong(const Instruction* instr);
+ void SimulateNEONFPMulByElement(const Instruction* instr);
+ void SimulateNEONFPMulByElementLong(const Instruction* instr);
+ void SimulateNEONComplexMulByElement(const Instruction* instr);
+ void SimulateNEONDotProdByElement(const Instruction* instr);
// Integer register accessors.
@@ -2790,6 +2862,14 @@ class Simulator : public DecoderVisitor {
uint64_t left,
uint64_t right,
int carry_in = 0);
+ std::pair<uint64_t, uint8_t> AddWithCarry(unsigned reg_size,
+ uint64_t left,
+ uint64_t right,
+ int carry_in);
+ using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
+ vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y);
+ vixl_uint128_t Mul64(uint64_t x, uint64_t y);
+ vixl_uint128_t Neg128(vixl_uint128_t x);
void LogicalHelper(const Instruction* instr, int64_t op2);
void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
void LoadStoreHelper(const Instruction* instr,
@@ -2834,7 +2914,9 @@ class Simulator : public DecoderVisitor {
int64_t value,
Extend extend_type,
unsigned left_shift = 0) const;
- uint16_t PolynomialMult(uint8_t op1, uint8_t op2) const;
+ uint64_t PolynomialMult(uint64_t op1,
+ uint64_t op2,
+ int lane_size_in_bits) const;
void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
@@ -3065,66 +3147,6 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
- LogicVRegister smull(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister smull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umull(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister smlal(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister smlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umlal(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister smlsl(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister smlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umlsl(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
- LogicVRegister umlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister umulh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -3134,31 +3156,16 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
- LogicVRegister sqdmull2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister sqdmlal(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
- LogicVRegister sqdmlal2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister sqdmlsl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
- LogicVRegister sqdmlsl2(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister sqdmulh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -3169,21 +3176,11 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
- LogicVRegister sdot(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister sqrdmlah(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
- LogicVRegister udot(VectorFormat vform,
- LogicVRegister dst,
- const LogicVRegister& src1,
- const LogicVRegister& src2,
- int index);
LogicVRegister sqrdmlsh(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -3233,6 +3230,7 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src2);
LogicVRegister bsl(VectorFormat vform,
LogicVRegister dst,
+ const LogicVRegister& src_mask,
const LogicVRegister& src1,
const LogicVRegister& src2);
LogicVRegister cls(VectorFormat vform,
@@ -3286,11 +3284,19 @@ class Simulator : public DecoderVisitor {
LogicVRegister uadalp(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister ror(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rotation);
LogicVRegister ext(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
int index);
+ LogicVRegister rotate_elements_right(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int index);
template <typename T>
LogicVRegister fcadd(VectorFormat vform,
LogicVRegister dst,
@@ -3331,6 +3337,40 @@ class Simulator : public DecoderVisitor {
LogicVRegister acc,
const LogicPRegister& pg,
const LogicVRegister& src);
+ LogicVRegister cadd(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot,
+ bool saturate = false);
+ LogicVRegister cmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot);
+ LogicVRegister cmla(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index,
+ int rot);
+ LogicVRegister bgrp(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool do_bext = false);
+ LogicVRegister bdep(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister histogram(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicPRegister& pg,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool do_segmented = false);
LogicVRegister index(VectorFormat vform,
LogicVRegister dst,
uint64_t start,
@@ -3353,6 +3393,10 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src,
int src_index);
+ LogicVRegister dup_elements_to_segments(
+ VectorFormat vform,
+ LogicVRegister dst,
+ const std::pair<int, int>& src_and_index);
LogicVRegister dup_immediate(VectorFormat vform,
LogicVRegister dst,
uint64_t imm);
@@ -3368,6 +3412,10 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const SimPRegister& pg,
const LogicVRegister& src);
+ LogicVRegister mov_alternating(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int start_at);
LogicPRegister mov_merging(LogicPRegister dst,
const LogicPRegister& pg,
const LogicPRegister& src);
@@ -3383,10 +3431,20 @@ class Simulator : public DecoderVisitor {
LogicVRegister sshl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
- const LogicVRegister& src2);
+ const LogicVRegister& src2,
+ bool shift_is_8bit = true);
LogicVRegister ushl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool shift_is_8bit = true);
+ LogicVRegister sshr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister ushr(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
const LogicVRegister& src2);
// Perform a "conditional last" operation. The first part of the pair is true
// if any predicate lane is active, false otherwise. The second part takes the
@@ -3396,6 +3454,11 @@ class Simulator : public DecoderVisitor {
const LogicPRegister& pg,
const LogicVRegister& src2,
int offset_from_last_active);
+ LogicPRegister match(VectorFormat vform,
+ LogicPRegister dst,
+ const LogicVRegister& haystack,
+ const LogicVRegister& needles,
+ bool negate_match);
LogicVRegister compact(VectorFormat vform,
LogicVRegister dst,
const LogicPRegister& pg,
@@ -3465,13 +3528,15 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src);
LogicVRegister uxtl(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src);
+ const LogicVRegister& src,
+ bool is_2 = false);
LogicVRegister uxtl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
LogicVRegister sxtl(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src);
+ const LogicVRegister& src,
+ bool is_2 = false);
LogicVRegister sxtl2(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
@@ -3507,10 +3572,6 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& ind);
LogicVRegister Table(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src,
- const LogicVRegister& tab);
- LogicVRegister Table(VectorFormat vform,
- LogicVRegister dst,
const LogicVRegister& ind,
bool zero_out_of_bounds,
const LogicVRegister* tab1,
@@ -3750,10 +3811,12 @@ class Simulator : public DecoderVisitor {
int shift);
LogicVRegister suqadd(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src);
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
LogicVRegister usqadd(VectorFormat vform,
LogicVRegister dst,
- const LogicVRegister& src);
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
LogicVRegister sqshl(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
@@ -3875,7 +3938,8 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
- bool is_signed);
+ bool is_src1_signed,
+ bool is_src2_signed);
LogicVRegister sdot(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -3884,12 +3948,41 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ LogicVRegister usdot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister cdot(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& acc,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot);
+ LogicVRegister sqrdcmlah(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int rot);
+ LogicVRegister sqrdcmlah(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& srca,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index,
+ int rot);
LogicVRegister sqrdmlash(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2,
bool round = true,
bool sub_op = false);
+ LogicVRegister sqrdmlash_d(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool round = true,
+ bool sub_op = false);
LogicVRegister sqrdmlah(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -3904,6 +3997,21 @@ class Simulator : public DecoderVisitor {
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+ LogicVRegister matmul(VectorFormat vform_dst,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool src1_signed,
+ bool src2_signed);
+ template <typename T>
+ LogicVRegister fmatmul(VectorFormat vform,
+ LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister fmatmul(VectorFormat vform,
+ LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
#define NEON_3VREG_LOGIC_LIST(V) \
V(addhn) \
V(addhn2) \
@@ -3923,23 +4031,14 @@ class Simulator : public DecoderVisitor {
V(sabdl2) \
V(uabdl) \
V(uabdl2) \
- V(smull) \
V(smull2) \
- V(umull) \
V(umull2) \
- V(smlal) \
V(smlal2) \
- V(umlal) \
V(umlal2) \
- V(smlsl) \
V(smlsl2) \
- V(umlsl) \
V(umlsl2) \
- V(sqdmlal) \
V(sqdmlal2) \
- V(sqdmlsl) \
V(sqdmlsl2) \
- V(sqdmull) \
V(sqdmull2)
#define DEFINE_LOGIC_FUNC(FXN) \
@@ -3950,6 +4049,26 @@ class Simulator : public DecoderVisitor {
NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
#undef DEFINE_LOGIC_FUNC
+#define NEON_MULL_LIST(V) \
+ V(smull) \
+ V(umull) \
+ V(smlal) \
+ V(umlal) \
+ V(smlsl) \
+ V(umlsl) \
+ V(sqdmlal) \
+ V(sqdmlsl) \
+ V(sqdmull)
+
+#define DECLARE_NEON_MULL_OP(FN) \
+ LogicVRegister FN(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2, \
+ bool is_2 = false);
+ NEON_MULL_LIST(DECLARE_NEON_MULL_OP)
+#undef DECLARE_NEON_MULL_OP
+
#define NEON_FP3SAME_LIST(V) \
V(fadd, FPAdd, false) \
V(fsub, FPSub, true) \
@@ -4111,6 +4230,9 @@ class Simulator : public DecoderVisitor {
LogicVRegister fexpa(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src);
+ LogicVRegister flogb(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
template <typename T>
LogicVRegister fscale(VectorFormat vform,
LogicVRegister dst,
@@ -4137,9 +4259,8 @@ class Simulator : public DecoderVisitor {
FPRounding rounding_mode,
bool inexact_exception = false,
FrintMode frint_mode = kFrintToInteger);
- LogicVRegister fcvt(VectorFormat vform,
- unsigned dst_data_size_in_bits,
- unsigned src_data_size_in_bits,
+ LogicVRegister fcvt(VectorFormat dst_vform,
+ VectorFormat src_vform,
LogicVRegister dst,
const LogicPRegister& pg,
const LogicVRegister& src);
@@ -4256,6 +4377,10 @@ class Simulator : public DecoderVisitor {
const LogicPRegister& pg,
const LogicVRegister& src);
+ LogicVRegister interleave_top_bottom(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
template <typename T>
struct TFPPairOp {
typedef T (Simulator::*type)(T a, T b);
@@ -4357,6 +4482,9 @@ class Simulator : public DecoderVisitor {
T FPMinNM(T a, T b);
template <typename T>
+ T FPMulNaNs(T op1, T op2);
+
+ template <typename T>
T FPMul(T op1, T op2);
template <typename T>
@@ -4491,6 +4619,27 @@ class Simulator : public DecoderVisitor {
const LogicVRegister& src2,
bool is_wide_elements);
+ // Pack all even- or odd-numbered elements of source vector side by side and
+ // place in elements of lower half the destination vector, and leave the upper
+ // half all zero.
+ // [...| H | G | F | E | D | C | B | A ]
+ // => [...................| G | E | C | A ]
+ LogicVRegister pack_even_elements(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
+ // [...| H | G | F | E | D | C | B | A ]
+ // => [...................| H | F | D | B ]
+ LogicVRegister pack_odd_elements(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src);
+
+ LogicVRegister adcl(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool top);
+
template <typename T>
LogicVRegister FTMaddHelper(VectorFormat vform,
LogicVRegister dst,
@@ -4587,9 +4736,9 @@ class Simulator : public DecoderVisitor {
bool pc_modified_;
const Instruction* pc_;
- // If non-NULL, the last instruction was a movprfx, and validity needs to be
- // checked.
- Instruction const* movprfx_;
+ // Pointer to the last simulated instruction, used for checking the validity
+ // of the current instruction with movprfx.
+ Instruction const* last_instr_;
// Branch type register, used for branch target identification.
BType btype_;
@@ -4613,6 +4762,13 @@ class Simulator : public DecoderVisitor {
static const char* preg_names[];
private:
+ using FormToVisitorFnMap =
+ std::unordered_map<uint32_t,
+ std::function<void(Simulator*, const Instruction*)>>;
+ static const FormToVisitorFnMap* GetFormToVisitorFnMap();
+
+ uint32_t form_hash_;
+
static const PACKey kPACKeyIA;
static const PACKey kPACKeyIB;
static const PACKey kPACKeyDA;
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 1b0f2c24..ebd05787 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -170,7 +170,20 @@ namespace vixl {
V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \
V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \
V(kPAuthFPAC, "PAuth FPAC", NULL) \
- V(kPAuthFPACCombined, "PAuth FPACCombined", NULL)
+ V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \
+ /* Scalable Vector Extension 2. */ \
+ V(kSVE2, "SVE2", "sve2") \
+ V(kSVESM4, "SVE SM4", "svesm4") \
+ V(kSVESHA3, "SVE SHA3", "svesha3") \
+ V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \
+ V(kSVEAES, "SVE AES", "sveaes") \
+ V(kSVEPmull128, "SVE Pmull128", "svepmull") \
+ /* Alternate floating-point behavior */ \
+ V(kAFP, "AFP", "afp") \
+ /* Enhanced Counter Virtualization */ \
+ V(kECV, "ECV", "ecv") \
+ /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \
+ V(kRPRES, "RPRES", "rpres")
// clang-format on
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index 0ae6dfc0..53876869 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -1395,6 +1395,25 @@ T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
return 0;
}
+// Jenkins one-at-a-time hash, based on
+// https://en.wikipedia.org/wiki/Jenkins_hash_function citing
+// https://www.drdobbs.com/database/algorithm-alley/184410284.
+constexpr uint32_t Hash(const char* str, uint32_t hash = 0) {
+ if (*str == '\0') {
+ hash += hash << 3;
+ hash ^= hash >> 11;
+ hash += hash << 15;
+ return hash;
+ } else {
+ hash += *str;
+ hash += hash << 10;
+ hash ^= hash >> 6;
+ return Hash(str + 1, hash);
+ }
+}
+
+constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); }
+
} // namespace vixl
#endif // VIXL_UTILS_H
diff --git a/test/aarch32/test-assembler-aarch32.cc b/test/aarch32/test-assembler-aarch32.cc
index 0be51e15..418bc11f 100644
--- a/test/aarch32/test-assembler-aarch32.cc
+++ b/test/aarch32/test-assembler-aarch32.cc
@@ -2207,7 +2207,7 @@ TEST(custom_literal_place_shared) {
VIXL_CHECK(!after.IsBound());
// Load the entries several times to test that literals can be shared.
- for (int i = 0; i < 20; i++) {
+ for (int j = 0; j < 20; j++) {
(masm.*test_case.instruction)(r0, &before);
(masm.*test_case.instruction)(r1, &after);
}
@@ -5160,7 +5160,7 @@ TEST_T32(veneer_and_literal5) {
int first_test = 2000;
// Test on both sizes of the Adr range which is 4095.
- for (int test = 0; test < kTestCount; test++) {
+ for (int test_num = 0; test_num < kTestCount; test_num++) {
const int string_size = 1000; // A lot more than the cbz range.
std::string test_string(string_size, 'x');
StringLiteral big_literal(test_string.c_str());
@@ -5168,7 +5168,7 @@ TEST_T32(veneer_and_literal5) {
__ Adr(r11, &big_literal);
{
- int num_nops = first_test + test;
+ int num_nops = first_test + test_num;
ExactAssemblyScope aas(&masm,
2 * num_nops,
CodeBufferCheckScope::kMaximumSize);
@@ -5177,15 +5177,15 @@ TEST_T32(veneer_and_literal5) {
}
}
- __ Cbz(r1, &labels[test]);
+ __ Cbz(r1, &labels[test_num]);
{
ExactAssemblyScope aas(&masm, 4, CodeBufferCheckScope::kMaximumSize);
__ add(r1, r1, 3);
}
- __ Bind(&labels[test]);
+ __ Bind(&labels[test_num]);
// Emit the literal pool if it has not beeen emitted (it's the case for
- // the lower values of test).
+ // the lower values of test_num).
__ EmitLiteralPool(PoolManager<int32_t>::kBranchRequired);
}
@@ -6476,61 +6476,65 @@ TEST_T32(assembler_bind_label) {
POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM)
#endif
-#define POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM) \
- can_encode = masm.INFO; \
- VIXL_CHECK(can_encode); \
- { \
- ExactAssemblyScope scope(&masm, \
- info->size, \
- ExactAssemblyScope::kExactSize); \
- int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
- if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
- pc = AlignDown(pc, 4); \
- } \
- Label label(pc + info->min_offset); \
- masm.ASM; \
- } \
- { \
- ExactAssemblyScope scope(&masm, \
- info->size, \
- ExactAssemblyScope::kExactSize); \
- int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
- if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
- pc = AlignDown(pc, 4); \
- } \
- Label label(pc + info->max_offset); \
- masm.ASM; \
+#define POSITIVE_TEST_FORWARD_REFERENCE_INFO(INST, INFO, ASM) \
+ can_encode = masm.INFO; \
+ VIXL_CHECK(can_encode); \
+ { \
+ ExactAssemblyScope scope(&masm, \
+ info->size, \
+ ExactAssemblyScope::kExactSize); \
+ int32_t program_counter = \
+ masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
+ if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
+ program_counter = AlignDown(program_counter, 4); \
+ } \
+ Label label(program_counter + info->min_offset); \
+ masm.ASM; \
+ } \
+ { \
+ ExactAssemblyScope scope(&masm, \
+ info->size, \
+ ExactAssemblyScope::kExactSize); \
+ int32_t program_counter = \
+ masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
+ if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
+ program_counter = AlignDown(program_counter, 4); \
+ } \
+ Label label(program_counter + info->max_offset); \
+ masm.ASM; \
}
#ifdef VIXL_NEGATIVE_TESTING
-#define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM) \
- try { \
- ExactAssemblyScope scope(&masm, \
- info->size, \
- ExactAssemblyScope::kMaximumSize); \
- int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
- if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
- pc = AlignDown(pc, 4); \
- } \
- Label label(pc + info->max_offset + info->alignment); \
- masm.ASM; \
- printf("Negative test for forward reference failed for %s.\n", INST); \
- abort(); \
- } catch (const std::runtime_error&) { \
- } \
- try { \
- ExactAssemblyScope scope(&masm, \
- info->size, \
- ExactAssemblyScope::kMaximumSize); \
- int32_t pc = masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
- if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
- pc = AlignDown(pc, 4); \
- } \
- Label label(pc + info->min_offset - info->alignment); \
- masm.ASM; \
- printf("Negative test for forward reference failed for %s.\n", INST); \
- abort(); \
- } catch (const std::runtime_error&) { \
+#define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM) \
+ try { \
+ ExactAssemblyScope scope(&masm, \
+ info->size, \
+ ExactAssemblyScope::kMaximumSize); \
+ int32_t program_counter = \
+ masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
+ if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
+ program_counter = AlignDown(program_counter, 4); \
+ } \
+ Label label(program_counter + info->max_offset + info->alignment); \
+ masm.ASM; \
+ printf("Negative test for forward reference failed for %s.\n", INST); \
+ abort(); \
+ } catch (const std::runtime_error&) { \
+ } \
+ try { \
+ ExactAssemblyScope scope(&masm, \
+ info->size, \
+ ExactAssemblyScope::kMaximumSize); \
+ int32_t program_counter = \
+ masm.GetCursorOffset() + __ GetArchitectureStatePCOffset(); \
+ if (info->pc_needs_aligning == ReferenceInfo::kAlignPc) { \
+ program_counter = AlignDown(program_counter, 4); \
+ } \
+ Label label(program_counter + info->min_offset - info->alignment); \
+ masm.ASM; \
+ printf("Negative test for forward reference failed for %s.\n", INST); \
+ abort(); \
+ } catch (const std::runtime_error&) { \
}
#else
#define NEGATIVE_TEST_FORWARD_REFERENCE_INFO(INST, ASM)
diff --git a/test/aarch32/test-disasm-a32.cc b/test/aarch32/test-disasm-a32.cc
index efc997ff..c6acac97 100644
--- a/test/aarch32/test-disasm-a32.cc
+++ b/test/aarch32/test-disasm-a32.cc
@@ -348,8 +348,9 @@ namespace aarch32 {
class TestDisassembler : public PrintDisassembler {
public:
- TestDisassembler(std::ostream& os, uint32_t pc) // NOLINT(runtime/references)
- : PrintDisassembler(os, pc) {}
+ TestDisassembler(std::ostream& os,
+ uint32_t program_counter) // NOLINT(runtime/references)
+ : PrintDisassembler(os, program_counter) {}
virtual void PrintCodeAddress(uint32_t code_address) VIXL_OVERRIDE {
USE(code_address);
@@ -2507,38 +2508,44 @@ TEST(macro_assembler_PushRegisterList) {
"beq 0x00000006\n"
"push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n");
- COMPARE_A32(Push(RegisterList(sp)), "stmdb sp!, {sp}\n");
+ // Narrow form, T1.
+ COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n");
+ // <single_register_list> form, T4
+ COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n");
- // TODO: Clarify behaviour of MacroAssembler vs Assembler with respect to
- // deprecated and unpredictable instructions. The tests reflect the
- // current behaviour and will need to be updated.
+ // It is usually UNPREDICTABLE to push sp.
+ MUST_FAIL_TEST_BOTH(Push(RegisterList(r0, sp)),
+ "Unpredictable instruction.\n");
+ MUST_FAIL_TEST_T32(Push(RegisterList(sp)), "Unpredictable instruction.\n");
+ MUST_FAIL_TEST_T32(Push(sp), "Unpredictable instruction.\n");
+ // A32 can push sp if it is the first register in the list.
+ COMPARE_A32(Push(sp), "stmdb sp!, {sp}\n");
+ COMPARE_A32(Push(RegisterList(sp)), "stmdb sp!, {sp}\n");
+ COMPARE_A32(Push(RegisterList(sp, lr)), "push {sp,lr}\n");
// Deprecated, but accepted:
+ SHOULD_FAIL_TEST_A32(Push(pc));
SHOULD_FAIL_TEST_A32(Push(RegisterList(pc)));
- // Whereas we don't accept the single-register version:
- MUST_FAIL_TEST_BOTH(Push(pc), "Unpredictable instruction.\n");
-
- // Accepted, but stores UNKNOWN value for the SP:
- SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, sp)));
-
- // The following use the T1 and A1 encodings for T32 and A32 respectively, and
- // hence have different preferred disassembly.
- COMPARE_T32(Push(RegisterList(r0)), "push {r0}\n");
- COMPARE_A32(Push(RegisterList(r0)), "stmdb sp!, {r0}\n");
- COMPARE_T32(Push(RegisterList(r7)), "push {r7}\n");
- COMPARE_A32(Push(RegisterList(r7)), "stmdb sp!, {r7}\n");
- COMPARE_T32(Push(RegisterList(lr)), "push {lr}\n");
- COMPARE_A32(Push(RegisterList(lr)), "stmdb sp!, {lr}\n");
-
- // T2 and A1 encodings, with the same preferred disassembly:
- COMPARE_BOTH(Push(RegisterList(r8)), "stmdb sp!, {r8}\n");
-
- // Cannot push the sp and pc in T32 when using a register list.
- MUST_FAIL_TEST_T32(Push(RegisterList(sp)),
- "Ill-formed 'push' instruction.\n");
- MUST_FAIL_TEST_T32(Push(RegisterList(pc)),
+ SHOULD_FAIL_TEST_A32(Push(RegisterList(r0, pc)));
+
+ MUST_FAIL_TEST_T32(Push(pc), "Unpredictable instruction.\n");
+ MUST_FAIL_TEST_T32(Push(RegisterList(pc)), "Unpredictable instruction.\n");
+ // The multiple-register T32 push can't encode PC at all.
+ MUST_FAIL_TEST_T32(Push(RegisterList(r0, pc)),
"Ill-formed 'push' instruction.\n");
+ // The following use the PUSH (T1) and PUSH (single register) (A1) encodings
+ // for T32 and A32 respectively:
+ COMPARE_BOTH(Push(RegisterList(r0)), "push {r0}\n");
+ COMPARE_BOTH(Push(RegisterList(r7)), "push {r7}\n");
+ COMPARE_BOTH(Push(RegisterList(lr)), "push {lr}\n");
+
+ // PUSH (single register), T4 and A1 encodings:
+ COMPARE_BOTH(Push(RegisterList(r8)), "push {r8}\n");
+
+ // Pushing zero registers should produce no instructions.
+ COMPARE_BOTH(Push(RegisterList()), "");
+
CLEANUP();
}
@@ -2564,29 +2571,33 @@ TEST(macro_assembler_PopRegisterList) {
"beq 0x00000006\n"
"pop {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ip}\n");
- // TODO: Accepted, but value of SP after the instruction is UNKNOWN:
- SHOULD_FAIL_TEST_A32(Pop(RegisterList(sp)));
+ // Narrow form, T1.
+ COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n");
+ // <single_register_list> form, T4.
+ COMPARE_T32(Pop(RegisterList(r10)), "pop {r10}\n");
- // Cannot pop the sp in T32 when using a register list.
- MUST_FAIL_TEST_T32(Pop(RegisterList(sp)), "Ill-formed 'pop' instruction.\n");
+ // It is UNPREDICTABLE to pop sp.
+ MUST_FAIL_TEST_BOTH(Pop(RegisterList(r0, sp)),
+ "Unpredictable instruction.\n");
+ MUST_FAIL_TEST_BOTH(Pop(RegisterList(sp)), "Unpredictable instruction.\n");
+ MUST_FAIL_TEST_BOTH(Pop(sp), "Unpredictable instruction.\n");
- // The following use the T1 and A1 encodings for T32 and A32 respectively, and
- // hence have different preferred disassembly.
- COMPARE_T32(Pop(RegisterList(pc)), "pop {pc}\n");
- COMPARE_A32(Pop(RegisterList(pc)), "ldm sp!, {pc}\n");
- COMPARE_T32(Pop(RegisterList(r0)), "pop {r0}\n");
- COMPARE_A32(Pop(RegisterList(r0)), "ldm sp!, {r0}\n");
- COMPARE_T32(Pop(RegisterList(r7)), "pop {r7}\n");
- COMPARE_A32(Pop(RegisterList(r7)), "ldm sp!, {r7}\n");
-
- // T2 and A1 encodings, with the same preferred disassembly:
- COMPARE_BOTH(Pop(RegisterList(r8)), "ldm sp!, {r8}\n");
- COMPARE_BOTH(Pop(RegisterList(lr)), "ldm sp!, {lr}\n");
-
- // TODO: Pushing both the lr and pc should not be allowed by the
- // MacroAssembler (deprecated for A32, for T32 they shouldn't both
- // be in the list).
- SHOULD_FAIL_TEST_BOTH(Pop(RegisterList(lr, pc)));
+ // The following use the POP (T1) and POP (single register) (A1) encodings for
+ // T32 and A32 respectively:
+ COMPARE_BOTH(Pop(RegisterList(pc)), "pop {pc}\n");
+ COMPARE_BOTH(Pop(RegisterList(r0)), "pop {r0}\n");
+ COMPARE_BOTH(Pop(RegisterList(r7)), "pop {r7}\n");
+
+ // POP (single register), T4 and A1 encodings:
+ COMPARE_BOTH(Pop(RegisterList(r8)), "pop {r8}\n");
+ COMPARE_BOTH(Pop(RegisterList(lr)), "pop {lr}\n");
+
+ MUST_FAIL_TEST_T32(Pop(RegisterList(lr, pc)), "Unpredictable instruction.\n");
+ // Deprecated, but allowed.
+ COMPARE_A32(Pop(RegisterList(lr, pc)), "pop {lr,pc}\n");
+
+ // Popping zero registers should produce no instructions.
+ COMPARE_BOTH(Pop(RegisterList()), "");
CLEANUP();
}
diff --git a/test/aarch64/test-api-movprfx-aarch64.cc b/test/aarch64/test-api-movprfx-aarch64.cc
index 1c1bceec..535ae0bf 100644
--- a/test/aarch64/test-api-movprfx-aarch64.cc
+++ b/test/aarch64/test-api-movprfx-aarch64.cc
@@ -41,19 +41,42 @@
namespace vixl {
namespace aarch64 {
+class InstructionReporter : public DecoderVisitor {
+ public:
+ InstructionReporter() : DecoderVisitor(kNonConstVisitor) {}
+
+ void Visit(Metadata* metadata, const Instruction* instr) VIXL_OVERRIDE {
+ USE(instr);
+ instr_form_ = (*metadata)["form"];
+ }
+
+ std::string MoveForm() { return std::move(instr_form_); }
+
+ private:
+ std::string instr_form_;
+};
+
static void CheckAndMaybeDisassembleMovprfxPairs(const CodeBuffer* buffer,
bool can_take_movprfx) {
const Instruction* pair = buffer->GetStartAddress<Instruction*>();
const Instruction* end = buffer->GetEndAddress<Instruction*>();
bool any_failures = false;
PrintDisassembler print_disasm(stdout);
+ Decoder decoder;
+ InstructionReporter reporter;
+ decoder.AppendVisitor(&reporter);
+
while (pair < end) {
const Instruction* movprfx = pair;
const Instruction* candidate = pair->GetNextInstruction();
const Instruction* next_pair = candidate->GetNextInstruction();
VIXL_ASSERT(candidate < end);
- bool failed = can_take_movprfx != candidate->CanTakeSVEMovprfx(movprfx);
+ Instr inst = candidate->GetInstructionBits();
+ decoder.Decode(reinterpret_cast<Instruction*>(&inst));
+ std::string form = reporter.MoveForm();
+ bool failed =
+ can_take_movprfx != candidate->CanTakeSVEMovprfx(form.c_str(), movprfx);
any_failures = any_failures || failed;
if (failed || Test::disassemble()) {
@@ -75,11 +98,11 @@ TEST(movprfx_negative_aliasing) {
// Test that CanTakeSVEMovprfx() checks that the movprfx destination does not
// alias an input to the prefixed instruction.
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 73;
+ static const size_t kPairCount = 79;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z0.VnB(), p0.Merging(), z9.VnB());
@@ -300,6 +323,24 @@ TEST(movprfx_negative_aliasing) {
__ movprfx(z14, z5);
__ uxtw(z14.VnD(), p3.Merging(), z14.VnD());
+
+ __ movprfx(z22, z5);
+ __ smmla(z22.VnS(), z22.VnB(), z0.VnB());
+
+ __ movprfx(z1, z5);
+ __ ummla(z1.VnS(), z10.VnB(), z1.VnB());
+
+ __ movprfx(z30, z5);
+ __ usmmla(z30.VnS(), z30.VnB(), z18.VnB());
+
+ __ movprfx(z4, z5);
+ __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
+
+ __ movprfx(z10, z5);
+ __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
+
+ __ movprfx(z1, z5);
+ __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
}
assm.FinalizeCode();
@@ -310,11 +351,13 @@ TEST(movprfx_negative_aliasing_fp) {
// Test that CanTakeSVEMovprfx() checks that the movprfx destination does not
// alias an input to the prefixed instruction.
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
+ CPUFeatures::kSVEF32MM,
+ CPUFeatures::kSVEF64MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 78;
+ static const size_t kPairCount = 80;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z17.VnS(), p1.Zeroing(), z12.VnS());
@@ -550,6 +593,12 @@ TEST(movprfx_negative_aliasing_fp) {
__ movprfx(z0.VnD(), p5.Zeroing(), z12.VnD());
__ ucvtf(z0.VnH(), p5.Merging(), z0.VnD());
+
+ __ movprfx(z30, z5);
+ __ fmmla(z30.VnS(), z30.VnS(), z18.VnS());
+
+ __ movprfx(z31, z5);
+ __ fmmla(z31.VnD(), z31.VnD(), z18.VnD());
}
assm.FinalizeCode();
@@ -1035,11 +1084,11 @@ TEST(movprfx_negative_predication) {
// Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears
// before an unpredicated instruction.
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 54;
+ static const size_t kPairCount = 60;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z27.VnS(), p1.Zeroing(), z12.VnS());
@@ -1203,6 +1252,24 @@ TEST(movprfx_negative_predication) {
__ movprfx(z9.VnD(), p0.Zeroing(), z16.VnD());
__ uqsub(z9.VnD(), z9.VnD(), 42);
+
+ __ movprfx(z22.VnS(), p0.Zeroing(), z5.VnS());
+ __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
+ __ ummla(z1.VnS(), z10.VnB(), z2.VnB());
+
+ __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
+ __ usmmla(z30.VnS(), z29.VnB(), z18.VnB());
+
+ __ movprfx(z4.VnS(), p0.Zeroing(), z5.VnS());
+ __ usdot(z4.VnS(), z3.VnB(), z4.VnB());
+
+ __ movprfx(z10.VnS(), p0.Zeroing(), z5.VnS());
+ __ usdot(z10.VnS(), z10.VnB(), z0.VnB(), 0);
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z5.VnS());
+ __ sudot(z1.VnS(), z10.VnB(), z1.VnB(), 1);
}
assm.FinalizeCode();
@@ -1213,11 +1280,13 @@ TEST(movprfx_negative_predication_fp) {
// Test that CanTakeSVEMovprfx() is false when a predicated movprfx appears
// before an unpredicated instruction.
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
+ CPUFeatures::kSVEF32MM,
+ CPUFeatures::kSVEF64MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 9;
+ static const size_t kPairCount = 11;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z10.VnH(), p3.Zeroing(), z3.VnH());
@@ -1244,9 +1313,15 @@ TEST(movprfx_negative_predication_fp) {
__ movprfx(z2.VnS(), p1.Zeroing(), z0.VnS());
__ fmls(z2.VnS(), z9.VnS(), z0.VnS(), 3);
- // Note that ftsmul and ftssel _cannot_ take movprfx.
+ // Note that ftsmul and ftssel cannot take movprfx.
__ movprfx(z22.VnD(), p6.Merging(), z16.VnD());
__ ftmad(z22.VnD(), z22.VnD(), z20.VnD(), 2);
+
+ __ movprfx(z30.VnS(), p0.Zeroing(), z5.VnS());
+ __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
+
+ __ movprfx(z31.VnD(), p1.Merging(), z5.VnD());
+ __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
}
assm.FinalizeCode();
@@ -1255,11 +1330,11 @@ TEST(movprfx_negative_predication_fp) {
TEST(movprfx_positive) {
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 117;
+ static const size_t kPairCount = 123;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z17, z28);
@@ -1349,9 +1424,6 @@ TEST(movprfx_positive) {
__ movprfx(z15, z18);
__ eor(z15.VnH(), z15.VnH(), 4);
- __ movprfx(z30, z11);
- __ ext(z30.VnB(), z30.VnB(), z11.VnB(), 42);
-
__ movprfx(z19, z28);
__ incd(z19.VnD(), SVE_MUL3);
@@ -1613,6 +1685,24 @@ TEST(movprfx_positive) {
__ movprfx(z18.VnD(), p7.Merging(), z25.VnD());
__ uxtw(z18.VnD(), p7.Merging(), z25.VnD());
+
+ __ movprfx(z22, z5);
+ __ smmla(z22.VnS(), z21.VnB(), z0.VnB());
+
+ __ movprfx(z1, z5);
+ __ ummla(z1.VnS(), z10.VnB(), z0.VnB());
+
+ __ movprfx(z30, z5);
+ __ usmmla(z30.VnS(), z31.VnB(), z18.VnB());
+
+ __ movprfx(z4, z5);
+ __ usdot(z4.VnS(), z3.VnB(), z3.VnB());
+
+ __ movprfx(z10, z5);
+ __ usdot(z10.VnS(), z9.VnB(), z0.VnB(), 0);
+
+ __ movprfx(z1, z5);
+ __ sudot(z1.VnS(), z10.VnB(), z2.VnB(), 1);
}
assm.FinalizeCode();
@@ -1621,11 +1711,13 @@ TEST(movprfx_positive) {
TEST(movprfx_positive_fp) {
Assembler assm;
- assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
+ CPUFeatures::kSVEF32MM,
+ CPUFeatures::kSVEF64MM);
{
// We have to use the Assembler directly to generate movprfx, so we need
// to manually reserve space for the code we're about to emit.
- static const size_t kPairCount = 73;
+ static const size_t kPairCount = 75;
CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
__ movprfx(z18.VnS(), p6.Zeroing(), z20.VnS());
@@ -1848,12 +1940,1775 @@ TEST(movprfx_positive_fp) {
__ movprfx(z17.VnD(), p4.Merging(), z22.VnD());
__ ucvtf(z17.VnH(), p4.Merging(), z4.VnD());
+
+ __ movprfx(z30, z5);
+ __ fmmla(z30.VnS(), z29.VnS(), z18.VnS());
+
+ __ movprfx(z31, z5);
+ __ fmmla(z31.VnD(), z30.VnD(), z18.VnD());
}
assm.FinalizeCode();
CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true);
}
+TEST(movprfx_positive_sve2) {
+ Assembler assm;
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ {
+ // We have to use the Assembler directly to generate movprfx, so we need
+ // to manually reserve space for the code we're about to emit.
+ static const size_t kPairCount = 145;
+ CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
+
+ __ movprfx(z25, z26);
+ __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
+
+ __ movprfx(z0, z1);
+ __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
+
+ __ movprfx(z3, z4);
+ __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB());
+
+ __ movprfx(z6, z7);
+ __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
+
+ __ movprfx(z18, z19);
+ __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
+
+ __ movprfx(z7, z8);
+ __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
+
+ __ movprfx(z21, z22);
+ __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
+
+ __ movprfx(z5, z6);
+ __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
+
+ __ movprfx(z19, z20);
+ __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
+
+ __ movprfx(z19, z20);
+ __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z19, z20);
+ __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
+
+ __ movprfx(z10, z11);
+ __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
+
+ __ movprfx(z3, z4);
+ __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
+
+ __ movprfx(z20, z22);
+ __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
+
+ __ movprfx(z14, z15);
+ __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
+
+ __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
+ __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
+
+ __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
+ __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
+
+ __ movprfx(z2, z3);
+ __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
+
+ __ movprfx(z22, z23);
+ __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
+
+ __ movprfx(z1, z2);
+ __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
+
+ __ movprfx(z16, z17);
+ __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
+
+ __ movprfx(z16, z17);
+ __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
+
+ __ movprfx(z16, z17);
+ __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z18, z19);
+ __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
+
+ __ movprfx(z18, z19);
+ __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
+
+ __ movprfx(z16, z17);
+ __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
+
+ __ movprfx(z16, z17);
+ __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z3, z4);
+ __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
+
+ __ movprfx(z3, z4);
+ __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z17, z18);
+ __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
+
+ __ movprfx(z13, z14);
+ __ saba(z13.VnB(), z2.VnB(), z31.VnB());
+
+ __ movprfx(z13, z14);
+ __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
+
+ __ movprfx(z14, z15);
+ __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
+
+ __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
+ __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
+
+ __ movprfx(z17, z18);
+ __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
+
+ __ movprfx(z20, z21);
+ __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
+
+ __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
+ __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
+
+ __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
+ __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
+
+ __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
+ __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
+
+ __ movprfx(z5, z6);
+ __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
+
+ __ movprfx(z27, z28);
+ __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
+ __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
+
+ __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
+ __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
+
+ __ movprfx(z20, z21);
+ __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z23, z24);
+ __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
+
+ __ movprfx(z11, z12);
+ __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
+
+ __ movprfx(z11, z12);
+ __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z26, z27);
+ __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
+
+ __ movprfx(z21, z22);
+ __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
+
+ __ movprfx(z21, z22);
+ __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z21, z22);
+ __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
+ __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
+ __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
+
+ __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
+ __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
+
+ __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
+ __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
+
+ __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
+ __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
+
+ __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
+ __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
+
+ __ movprfx(z10.VnB(), p1.Merging(), z11.VnB());
+ __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
+
+ __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
+ __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
+
+ __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
+ __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
+
+ __ movprfx(z12.VnB(), p0.Merging(), z13.VnB());
+ __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
+
+ __ movprfx(z0, z1);
+ __ srsra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z0, z1);
+ __ ssra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
+ __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
+
+ __ movprfx(z23, z24);
+ __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
+
+ __ movprfx(z11, z12);
+ __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
+
+ __ movprfx(z4, z5);
+ __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
+
+ __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
+ __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
+
+ __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
+ __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
+
+ __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
+ __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
+
+ __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
+ __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
+
+ __ movprfx(z7, z8);
+ __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
+
+ __ movprfx(z10, z11);
+ __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11, z12);
+ __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
+
+ __ movprfx(z11, z12);
+ __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z11, z12);
+ __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
+ __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
+
+ __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
+ __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
+
+ __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
+ __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
+
+ __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
+ __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
+
+ __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
+ __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
+
+ __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
+ __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
+
+ __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
+ __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
+
+ __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
+ __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
+
+ __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
+ __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
+
+ __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
+ __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
+
+ __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
+ __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
+
+ __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
+ __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
+
+ __ movprfx(z31.VnB(), p2.Merging(), z0.VnB());
+ __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
+
+ __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
+ __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
+
+ __ movprfx(z0, z1);
+ __ ursra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
+ __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());
+
+ __ movprfx(z0, z1);
+ __ usra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z16, z17);
+ __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
+ }
+ assm.FinalizeCode();
+
+ CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), true);
+}
+
+TEST(movprfx_negative_instructions_sve2) {
+ Assembler assm;
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kSVEBitPerm);
+ {
+ // We have to use the Assembler directly to generate movprfx, so we need
+ // to manually reserve space for the code we're about to emit.
+ static const size_t kPairCount = 133;
+ CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
+
+ __ movprfx(z29, z30);
+ __ addhnb(z29.VnS(), z19.VnD(), z2.VnD());
+
+ __ movprfx(z8, z9);
+ __ addhnt(z8.VnS(), z12.VnD(), z6.VnD());
+
+ __ movprfx(z18, z19);
+ __ bdep(z18.VnB(), z10.VnB(), z0.VnB());
+
+ __ movprfx(z6, z7);
+ __ bext(z6.VnB(), z2.VnB(), z5.VnB());
+
+ __ movprfx(z24, z25);
+ __ bgrp(z24.VnB(), z9.VnB(), z5.VnB());
+
+ __ movprfx(z1, z2);
+ __ fcvtlt(z1.VnD(), p1.Merging(), z28.VnS());
+
+ __ movprfx(z1, z2);
+ __ fcvtlt(z1.VnS(), p1.Merging(), z28.VnH());
+
+ __ movprfx(z4, z5);
+ __ fcvtnt(z4.VnH(), p7.Merging(), z0.VnS());
+
+ __ movprfx(z4, z5);
+ __ fcvtnt(z4.VnS(), p7.Merging(), z0.VnD());
+
+ __ movprfx(z27, z28);
+ __ fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD());
+
+ __ movprfx(z24, z25);
+ __ histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS());
+
+ __ movprfx(z22, z23);
+ __ histseg(z22.VnB(), z14.VnB(), z8.VnB());
+
+ __ movprfx(z21, z22);
+ __ ldnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z21.VnS(), x23));
+
+ __ movprfx(z21, z22);
+ __ ldnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
+
+ __ movprfx(z10, z11);
+ __ ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z23.VnD(), x6));
+
+ __ movprfx(z30, z31);
+ __ ldnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x11));
+
+ __ movprfx(z30, z31);
+ __ ldnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x11));
+
+ __ movprfx(z7, z8);
+ __ ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11));
+
+ __ movprfx(z7, z8);
+ __ ldnt1sb(z7.VnD(), p3.Zeroing(), SVEMemOperand(z18.VnD(), x11));
+
+ __ movprfx(z17, z18);
+ __ ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19));
+
+ __ movprfx(z17, z18);
+ __ ldnt1sh(z17.VnD(), p5.Zeroing(), SVEMemOperand(z31.VnD(), x19));
+
+ __ movprfx(z3, z4);
+ __ ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10));
+
+ __ movprfx(z0, z1);
+ __ ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
+
+ __ movprfx(z0, z1);
+ __ ldnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
+
+ __ movprfx(z18, z19);
+ __ match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB());
+
+ __ movprfx(z15, z16);
+ __ mul(z15.VnB(), z15.VnB(), z15.VnB());
+
+ __ movprfx(z15, z16);
+ __ mul(z15.VnH(), z15.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z15, z16);
+ __ mul(z15.VnS(), z15.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z15, z16);
+ __ mul(z15.VnD(), z15.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z20, z21);
+ __ nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB());
+
+ __ movprfx(z0, z1);
+ __ pmul(z0.VnB(), z5.VnB(), z5.VnB());
+
+ __ movprfx(z12, z13);
+ __ pmullb(z12.VnD(), z21.VnS(), z12.VnS());
+
+ __ movprfx(z31, z0);
+ __ pmullt(z31.VnD(), z30.VnS(), z26.VnS());
+
+ __ movprfx(z0, z1);
+ __ raddhnb(z0.VnS(), z11.VnD(), z10.VnD());
+
+ __ movprfx(z23, z24);
+ __ raddhnt(z23.VnS(), z27.VnD(), z9.VnD());
+
+ __ movprfx(z5, z6);
+ __ rshrnb(z5.VnB(), z1.VnH(), 1);
+
+ __ movprfx(z5, z6);
+ __ rshrnt(z5.VnB(), z1.VnH(), 8);
+
+ __ movprfx(z30, z31);
+ __ rsubhnb(z30.VnS(), z29.VnD(), z11.VnD());
+
+ __ movprfx(z25, z26);
+ __ rsubhnt(z25.VnS(), z7.VnD(), z18.VnD());
+
+ __ movprfx(z2, z3);
+ __ sabdlb(z2.VnD(), z21.VnS(), z3.VnS());
+
+ __ movprfx(z25, z26);
+ __ sabdlt(z25.VnD(), z23.VnS(), z17.VnS());
+
+ __ movprfx(z24, z25);
+ __ saddlb(z24.VnD(), z30.VnS(), z16.VnS());
+
+ __ movprfx(z15, z16);
+ __ saddlbt(z15.VnD(), z6.VnS(), z18.VnS());
+
+ __ movprfx(z21, z22);
+ __ saddlt(z21.VnD(), z29.VnS(), z31.VnS());
+
+ __ movprfx(z12, z13);
+ __ saddwb(z12.VnD(), z8.VnD(), z8.VnS());
+
+ __ movprfx(z24, z25);
+ __ saddwt(z24.VnD(), z0.VnD(), z3.VnS());
+
+ __ movprfx(z7, z8);
+ __ shrnb(z7.VnB(), z4.VnH(), 1);
+
+ __ movprfx(z21, z22);
+ __ shrnt(z21.VnB(), z29.VnH(), 1);
+
+ __ movprfx(z29, z30);
+ __ sli(z29.VnB(), z7.VnB(), 0);
+
+ __ movprfx(z23, z24);
+ __ smulh(z23.VnB(), z23.VnB(), z3.VnB());
+
+ __ movprfx(z10, z11);
+ __ smullb(z10.VnD(), z4.VnS(), z4.VnS());
+
+ __ movprfx(z10, z11);
+ __ smullb(z10.VnS(), z4.VnH(), z4.VnH(), 0);
+
+ __ movprfx(z10, z11);
+ __ smullb(z10.VnD(), z4.VnS(), z4.VnS(), 0);
+
+ __ movprfx(z31, z0);
+ __ smullt(z31.VnD(), z26.VnS(), z5.VnS());
+
+ __ movprfx(z31, z0);
+ __ smullt(z31.VnS(), z26.VnH(), z5.VnH(), 0);
+
+ __ movprfx(z31, z0);
+ __ smullt(z31.VnD(), z26.VnS(), z5.VnS(), 0);
+
+ __ movprfx(z18, z19);
+ __ sqdmulh(z18.VnB(), z25.VnB(), z1.VnB());
+
+ __ movprfx(z18, z19);
+ __ sqdmulh(z18.VnH(), z25.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z18, z19);
+ __ sqdmulh(z18.VnS(), z25.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z18, z19);
+ __ sqdmulh(z18.VnD(), z25.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z1, z2);
+ __ sqdmullb(z1.VnD(), z31.VnS(), z21.VnS());
+
+ __ movprfx(z1, z2);
+ __ sqdmullb(z1.VnS(), z31.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ sqdmullb(z1.VnD(), z31.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z2, z3);
+ __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS());
+
+ __ movprfx(z2, z3);
+ __ sqdmullt(z2.VnS(), z1.VnH(), z5.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ sqdmullt(z2.VnD(), z1.VnS(), z5.VnS(), 0);
+
+ __ movprfx(z21, z22);
+ __ sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB());
+
+ __ movprfx(z21, z22);
+ __ sqrdmulh(z21.VnH(), z21.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z21, z22);
+ __ sqrdmulh(z21.VnS(), z21.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z21, z22);
+ __ sqrdmulh(z21.VnD(), z21.VnD(), z2.VnD(), 0);
+
+ __ movprfx(z1, z2);
+ __ sqrshrnb(z1.VnB(), z1.VnH(), 1);
+
+ __ movprfx(z24, z25);
+ __ sqrshrnt(z24.VnB(), z19.VnH(), 8);
+
+ __ movprfx(z23, z24);
+ __ sqrshrunb(z23.VnB(), z28.VnH(), 1);
+
+ __ movprfx(z9, z10);
+ __ sqrshrunt(z9.VnB(), z15.VnH(), 8);
+
+ __ movprfx(z25, z26);
+ __ sqshrnb(z25.VnB(), z1.VnH(), 1);
+
+ __ movprfx(z0, z1);
+ __ sqshrnt(z0.VnB(), z25.VnH(), 8);
+
+ __ movprfx(z25, z26);
+ __ sqshrunb(z25.VnB(), z10.VnH(), 1);
+
+ __ movprfx(z20, z21);
+ __ sqshrunt(z20.VnB(), z3.VnH(), 8);
+
+ __ movprfx(z2, z3);
+ __ sqxtnb(z2.VnB(), z0.VnH());
+
+ __ movprfx(z31, z0);
+ __ sqxtnt(z31.VnB(), z18.VnH());
+
+ __ movprfx(z28, z29);
+ __ sqxtunb(z28.VnB(), z6.VnH());
+
+ __ movprfx(z14, z15);
+ __ sqxtunt(z14.VnB(), z31.VnH());
+
+ __ movprfx(z6, z7);
+ __ sri(z6.VnB(), z9.VnB(), 1);
+
+ __ movprfx(z2, z3);
+ __ sshllb(z2.VnH(), z20.VnB(), 0);
+
+ __ movprfx(z27, z28);
+ __ sshllt(z27.VnH(), z8.VnB(), 0);
+
+ __ movprfx(z4, z5);
+ __ ssublb(z4.VnD(), z23.VnS(), z7.VnS());
+
+ __ movprfx(z6, z7);
+ __ ssublbt(z6.VnD(), z28.VnS(), z12.VnS());
+
+ __ movprfx(z12, z13);
+ __ ssublt(z12.VnD(), z13.VnS(), z6.VnS());
+
+ __ movprfx(z11, z12);
+ __ ssubltb(z11.VnD(), z18.VnS(), z19.VnS());
+
+ __ movprfx(z7, z8);
+ __ ssubwb(z7.VnD(), z28.VnD(), z11.VnS());
+
+ __ movprfx(z29, z30);
+ __ ssubwt(z29.VnD(), z25.VnD(), z20.VnS());
+
+ __ movprfx(z21, z22);
+ __ stnt1b(z21.VnS(), p5.Zeroing(), SVEMemOperand(z1.VnS(), x23));
+
+ __ movprfx(z21, z22);
+ __ stnt1b(z21.VnD(), p5.Zeroing(), SVEMemOperand(z1.VnD(), x23));
+
+ __ movprfx(z10, z11);
+ __ stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(z1.VnD(), x23));
+
+ __ movprfx(z30, z31);
+ __ stnt1h(z30.VnS(), p4.Zeroing(), SVEMemOperand(z6.VnS(), x6));
+
+ __ movprfx(z30, z31);
+ __ stnt1h(z30.VnD(), p4.Zeroing(), SVEMemOperand(z6.VnD(), x6));
+
+ __ movprfx(z0, z1);
+ __ stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(z11.VnS(), x1));
+
+ __ movprfx(z0, z1);
+ __ stnt1w(z0.VnD(), p4.Zeroing(), SVEMemOperand(z11.VnD(), x1));
+
+ __ movprfx(z31, z0);
+ __ subhnb(z31.VnS(), z31.VnD(), z7.VnD());
+
+ __ movprfx(z31, z0);
+ __ subhnt(z31.VnS(), z22.VnD(), z27.VnD());
+
+ __ movprfx(z24, z25);
+ __ tbl(z24.VnB(), z29.VnB(), z30.VnB(), z0.VnB());
+
+ __ movprfx(z22, z23);
+ __ tbx(z22.VnB(), z15.VnB(), z19.VnB());
+
+ __ movprfx(z1, z2);
+ __ uabdlb(z1.VnD(), z26.VnS(), z12.VnS());
+
+ __ movprfx(z25, z26);
+ __ uabdlt(z25.VnD(), z29.VnS(), z14.VnS());
+
+ __ movprfx(z3, z4);
+ __ uaddlb(z3.VnD(), z5.VnS(), z2.VnS());
+
+ __ movprfx(z15, z16);
+ __ uaddlt(z15.VnD(), z28.VnS(), z20.VnS());
+
+ __ movprfx(z31, z0);
+ __ uaddwb(z31.VnD(), z8.VnD(), z25.VnS());
+
+ __ movprfx(z17, z18);
+ __ uaddwt(z17.VnD(), z15.VnD(), z2.VnS());
+
+ __ movprfx(z12, z13);
+ __ umulh(z12.VnB(), z12.VnB(), z17.VnB());
+
+ __ movprfx(z12, z13);
+ __ umullb(z12.VnD(), z5.VnS(), z2.VnS());
+
+ __ movprfx(z12, z13);
+ __ umullb(z12.VnS(), z5.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z12, z13);
+ __ umullb(z12.VnD(), z5.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z24, z25);
+ __ umullt(z24.VnD(), z6.VnS(), z6.VnS());
+
+ __ movprfx(z24, z25);
+ __ umullt(z24.VnS(), z6.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z24, z25);
+ __ umullt(z24.VnD(), z6.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z30, z31);
+ __ uqrshrnb(z30.VnB(), z25.VnH(), 1);
+
+ __ movprfx(z3, z4);
+ __ uqrshrnt(z3.VnB(), z25.VnH(), 8);
+
+ __ movprfx(z17, z18);
+ __ uqshrnb(z17.VnB(), z4.VnH(), 1);
+
+ __ movprfx(z28, z29);
+ __ uqshrnt(z28.VnB(), z18.VnH(), 8);
+
+ __ movprfx(z28, z29);
+ __ uqxtnb(z28.VnB(), z4.VnH());
+
+ __ movprfx(z19, z20);
+ __ uqxtnt(z19.VnB(), z7.VnH());
+
+ __ movprfx(z8, z9);
+ __ ushllb(z8.VnH(), z31.VnB(), 0);
+
+ __ movprfx(z3, z4);
+ __ ushllt(z3.VnH(), z21.VnB(), 0);
+
+ __ movprfx(z25, z26);
+ __ usublb(z25.VnD(), z9.VnS(), z17.VnS());
+
+ __ movprfx(z5, z6);
+ __ usublt(z5.VnD(), z11.VnS(), z15.VnS());
+
+ __ movprfx(z10, z11);
+ __ usubwb(z10.VnD(), z13.VnD(), z20.VnS());
+
+ __ movprfx(z15, z16);
+ __ usubwt(z15.VnD(), z8.VnD(), z23.VnS());
+
+ __ movprfx(z20, z21);
+ __ whilege(p0.VnB(), w20, w29);
+
+ __ movprfx(z24, z25);
+ __ whilegt(p11.VnB(), w24, w3);
+
+ __ movprfx(z20, z21);
+ __ whilehi(p2.VnB(), x20, x8);
+
+ __ movprfx(z22, z23);
+ __ whilehs(p4.VnB(), w22, w9);
+
+ __ movprfx(z25, z26);
+ __ whilerw(p7.VnB(), x25, x27);
+
+ __ movprfx(z14, z15);
+ __ whilewr(p8.VnB(), x14, x14);
+ }
+ assm.FinalizeCode();
+
+ CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
+}
+
+TEST(movprfx_negative_predication_sve2) {
+ Assembler assm;
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ {
+ // We have to use the Assembler directly to generate movprfx, so we need
+ // to manually reserve space for the code we're about to emit.
+ static const size_t kPairCount = 140;
+ CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
+
+ __ movprfx(z25.VnS(), p0.Zeroing(), z26.VnS());
+ __ adclb(z25.VnS(), z17.VnS(), z24.VnS());
+
+ __ movprfx(z0.VnS(), p0.Zeroing(), z1.VnS());
+ __ adclt(z0.VnS(), z2.VnS(), z15.VnS());
+
+ __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
+ __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD());
+
+ __ movprfx(z18.VnD(), p0.Zeroing(), z19.VnD());
+ __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD());
+
+ __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
+ __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD());
+
+ __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
+ __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD());
+
+ __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
+ __ cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90);
+
+ __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
+ __ cdot(z7.VnS(), z4.VnB(), z10.VnB(), 0);
+
+ __ movprfx(z7.VnS(), p0.Zeroing(), z8.VnS());
+ __ cdot(z7.VnS(), z4.VnB(), z0.VnB(), 0, 0);
+
+ __ movprfx(z7.VnD(), p0.Zeroing(), z8.VnD());
+ __ cdot(z7.VnD(), z4.VnH(), z0.VnH(), 0, 0);
+
+ __ movprfx(z19.VnB(), p0.Zeroing(), z20.VnB());
+ __ cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0);
+
+ __ movprfx(z19.VnS(), p0.Zeroing(), z20.VnS());
+ __ cmla(z19.VnS(), z7.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z19.VnH(), p0.Zeroing(), z20.VnH());
+ __ cmla(z19.VnH(), z7.VnH(), z2.VnH(), 0, 0);
+
+ __ movprfx(z10.VnD(), p0.Zeroing(), z11.VnD());
+ __ eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD());
+
+ __ movprfx(z3.VnB(), p0.Zeroing(), z4.VnB());
+ __ eorbt(z3.VnB(), z10.VnB(), z8.VnB());
+
+ __ movprfx(z20.VnB(), p0.Zeroing(), z22.VnB());
+ __ eortb(z20.VnB(), z21.VnB(), z15.VnB());
+
+ __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
+ __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD());
+
+ __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
+ __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD());
+
+ __ movprfx(z22.VnD(), p0.Zeroing(), z23.VnD());
+ __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD());
+
+ __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
+ __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD());
+
+ __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
+ __ fmlalb(z16.VnS(), z18.VnH(), z29.VnH());
+
+ __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
+ __ fmlalb(z16.VnS(), z18.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
+ __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH());
+
+ __ movprfx(z18.VnS(), p0.Zeroing(), z19.VnS());
+ __ fmlalt(z18.VnS(), z13.VnH(), z5.VnH(), 0);
+
+ __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
+ __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH());
+
+ __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
+ __ fmlslb(z16.VnS(), z10.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
+ __ fmlslt(z3.VnS(), z17.VnH(), z14.VnH());
+
+ __ movprfx(z3.VnS(), p0.Zeroing(), z4.VnS());
+ __ fmlslt(z3.VnS(), z17.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
+ __ mla(z2.VnH(), z0.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
+ __ mla(z2.VnS(), z0.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
+ __ mla(z2.VnD(), z0.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z2.VnH(), p0.Zeroing(), z3.VnH());
+ __ mls(z2.VnH(), z0.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z2.VnS(), p0.Zeroing(), z3.VnS());
+ __ mls(z2.VnS(), z0.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z2.VnD(), p0.Zeroing(), z3.VnD());
+ __ mls(z2.VnD(), z0.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z17.VnD(), p0.Zeroing(), z18.VnD());
+ __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD());
+
+ __ movprfx(z13.VnB(), p0.Zeroing(), z14.VnB());
+ __ saba(z13.VnB(), z2.VnB(), z31.VnB());
+
+ __ movprfx(z13.VnD(), p0.Zeroing(), z14.VnD());
+ __ sabalb(z13.VnD(), z20.VnS(), z26.VnS());
+
+ __ movprfx(z14.VnD(), p0.Zeroing(), z15.VnD());
+ __ sabalt(z14.VnD(), z19.VnS(), z10.VnS());
+
+ __ movprfx(z17.VnS(), p0.Zeroing(), z18.VnS());
+ __ sbclb(z17.VnS(), z10.VnS(), z8.VnS());
+
+ __ movprfx(z20.VnS(), p0.Zeroing(), z21.VnS());
+ __ sbclt(z20.VnS(), z0.VnS(), z13.VnS());
+
+ __ movprfx(z5.VnB(), p0.Zeroing(), z6.VnB());
+ __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB());
+
+ __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
+ __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlalb(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlalb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
+ __ smlalb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlalt(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlalt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
+ __ smlalt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlslb(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlslb(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
+ __ smlslb(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlslt(z1.VnD(), z3.VnS(), z23.VnS());
+
+ __ movprfx(z1.VnD(), p0.Zeroing(), z2.VnD());
+ __ smlslt(z1.VnD(), z3.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1.VnS(), p0.Zeroing(), z2.VnS());
+ __ smlslt(z1.VnS(), z3.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z20.VnB(), p0.Zeroing(), z21.VnB());
+ __ sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90);
+
+ __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
+ __ sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS());
+
+ __ movprfx(z6.VnD(), p0.Zeroing(), z7.VnD());
+ __ sqdmlalb(z6.VnD(), z19.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z6.VnS(), p0.Zeroing(), z7.VnS());
+ __ sqdmlalb(z6.VnS(), z19.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z23.VnD(), p0.Zeroing(), z24.VnD());
+ __ sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS());
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS());
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ sqdmlalt(z11.VnD(), z0.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
+ __ sqdmlalt(z11.VnS(), z0.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
+ __ sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS());
+
+ __ movprfx(z16.VnD(), p0.Zeroing(), z17.VnD());
+ __ sqdmlslb(z16.VnD(), z26.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z16.VnS(), p0.Zeroing(), z17.VnS());
+ __ sqdmlslb(z16.VnS(), z26.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z26.VnD(), p0.Zeroing(), z27.VnD());
+ __ sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS());
+
+ __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
+ __ sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS());
+
+ __ movprfx(z21.VnD(), p0.Zeroing(), z22.VnD());
+ __ sqdmlslt(z21.VnD(), z23.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z21.VnS(), p0.Zeroing(), z22.VnS());
+ __ sqdmlslt(z21.VnS(), z23.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z31.VnB(), p0.Zeroing(), z0.VnB());
+ __ sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0);
+
+ __ movprfx(z31.VnH(), p0.Zeroing(), z0.VnH());
+ __ sqrdcmlah(z31.VnH(), z15.VnH(), z2.VnH(), 0, 0);
+
+ __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
+ __ sqrdcmlah(z31.VnS(), z15.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z27.VnB(), p0.Zeroing(), z28.VnB());
+ __ sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB());
+
+ __ movprfx(z27.VnH(), p0.Zeroing(), z28.VnH());
+ __ sqrdmlah(z27.VnH(), z28.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z27.VnS(), p0.Zeroing(), z28.VnS());
+ __ sqrdmlah(z27.VnS(), z28.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z27.VnD(), p0.Zeroing(), z28.VnD());
+ __ sqrdmlah(z27.VnD(), z28.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z11.VnB(), p0.Zeroing(), z12.VnB());
+ __ sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB());
+
+ __ movprfx(z11.VnH(), p0.Zeroing(), z12.VnH());
+ __ sqrdmlsh(z11.VnH(), z16.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
+ __ sqrdmlsh(z11.VnS(), z16.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ sqrdmlsh(z11.VnD(), z16.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
+ __ srsra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
+ __ ssra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z23.VnB(), p0.Zeroing(), z24.VnB());
+ __ uaba(z23.VnB(), z22.VnB(), z20.VnB());
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ uabalb(z11.VnD(), z25.VnS(), z12.VnS());
+
+ __ movprfx(z4.VnD(), p0.Zeroing(), z5.VnD());
+ __ uabalt(z4.VnD(), z2.VnS(), z31.VnS());
+
+ __ movprfx(z7.VnB(), p0.Zeroing(), z8.VnB());
+ __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB());
+
+ __ movprfx(z10.VnB(), p0.Zeroing(), z11.VnB());
+ __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB());
+
+ __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
+ __ umlalb(z31.VnD(), z9.VnS(), z21.VnS());
+
+ __ movprfx(z31.VnD(), p0.Zeroing(), z0.VnD());
+ __ umlalb(z31.VnD(), z9.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z31.VnS(), p0.Zeroing(), z0.VnS());
+ __ umlalb(z31.VnS(), z9.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ umlalt(z11.VnD(), z5.VnS(), z22.VnS());
+
+ __ movprfx(z11.VnD(), p0.Zeroing(), z12.VnD());
+ __ umlalt(z11.VnD(), z5.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z11.VnS(), p0.Zeroing(), z12.VnS());
+ __ umlalt(z11.VnS(), z5.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
+ __ umlslb(z28.VnD(), z13.VnS(), z9.VnS());
+
+ __ movprfx(z28.VnD(), p0.Zeroing(), z29.VnD());
+ __ umlslb(z28.VnD(), z13.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z28.VnS(), p0.Zeroing(), z29.VnS());
+ __ umlslb(z28.VnS(), z13.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
+ __ umlslt(z9.VnD(), z12.VnS(), z30.VnS());
+
+ __ movprfx(z9.VnD(), p0.Zeroing(), z10.VnD());
+ __ umlslt(z9.VnD(), z12.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z9.VnS(), p0.Zeroing(), z10.VnS());
+ __ umlslt(z9.VnS(), z12.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
+ __ ursra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z0.VnB(), p0.Zeroing(), z1.VnB());
+ __ usra(z0.VnB(), z8.VnB(), 1);
+
+ __ movprfx(z16.VnB(), p0.Zeroing(), z17.VnB());
+ __ xar(z16.VnB(), z16.VnB(), z13.VnB(), 1);
+ }
+ assm.FinalizeCode();
+
+ CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
+}
+
+TEST(movprfx_negative_aliasing_sve2) {
+ Assembler assm;
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ {
+ // We have to use the Assembler directly to generate movprfx, so we need
+ // to manually reserve space for the code we're about to emit.
+ static const size_t kPairCount = 140;
+ CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
+
+ __ movprfx(z25, z26);
+ __ adclb(z25.VnS(), z17.VnS(), z25.VnS());
+
+ __ movprfx(z0, z1);
+ __ adclt(z0.VnS(), z2.VnS(), z0.VnS());
+
+ __ movprfx(z3, z4);
+ __ addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB());
+
+ __ movprfx(z6, z7);
+ __ bcax(z6.VnD(), z6.VnD(), z12.VnD(), z6.VnD());
+
+ __ movprfx(z18, z19);
+ __ bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z18.VnD());
+
+ __ movprfx(z7, z8);
+ __ bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z7.VnD());
+
+ __ movprfx(z21, z22);
+ __ bsl(z21.VnD(), z21.VnD(), z2.VnD(), z21.VnD());
+
+ __ movprfx(z5, z6);
+ __ cadd(z5.VnB(), z5.VnB(), z5.VnB(), 90);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnS(), z4.VnB(), z7.VnB(), 0, 0);
+
+ __ movprfx(z7, z8);
+ __ cdot(z7.VnD(), z7.VnH(), z0.VnH(), 0, 0);
+
+ __ movprfx(z19, z20);
+ __ cmla(z19.VnB(), z19.VnB(), z2.VnB(), 0);
+
+ __ movprfx(z19, z20);
+ __ cmla(z19.VnS(), z19.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z1, z20);
+ __ cmla(z1.VnH(), z7.VnH(), z1.VnH(), 0, 0);
+
+ __ movprfx(z10, z11);
+ __ eor3(z10.VnD(), z10.VnD(), z10.VnD(), z23.VnD());
+
+ __ movprfx(z3, z4);
+ __ eorbt(z3.VnB(), z10.VnB(), z3.VnB());
+
+ __ movprfx(z20, z22);
+ __ eortb(z20.VnB(), z21.VnB(), z20.VnB());
+
+ __ movprfx(z14, z15);
+ __ faddp(z14.VnD(), p1.Merging(), z14.VnD(), z14.VnD());
+
+ __ movprfx(z14.VnD(), p4.Merging(), z15.VnD());
+ __ fcvtx(z14.VnS(), p4.Merging(), z14.VnD());
+
+ __ movprfx(z15.VnH(), p0.Merging(), z16.VnH());
+ __ flogb(z15.VnH(), p0.Merging(), z15.VnH());
+
+ __ movprfx(z2, z3);
+ __ fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z2.VnD());
+
+ __ movprfx(z22, z23);
+ __ fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z22.VnD());
+
+ __ movprfx(z1, z2);
+ __ fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z1.VnD());
+
+ __ movprfx(z16, z17);
+ __ fminp(z16.VnD(), p3.Merging(), z16.VnD(), z16.VnD());
+
+ __ movprfx(z16, z17);
+ __ fmlalb(z16.VnS(), z18.VnH(), z16.VnH());
+
+ __ movprfx(z16, z17);
+ __ fmlalb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z18, z19);
+ __ fmlalt(z18.VnS(), z13.VnH(), z18.VnH());
+
+ __ movprfx(z18, z19);
+ __ fmlalt(z18.VnS(), z18.VnH(), z5.VnH(), 0);
+
+ __ movprfx(z16, z17);
+ __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH());
+
+ __ movprfx(z16, z17);
+ __ fmlslb(z16.VnS(), z16.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z3, z4);
+ __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH());
+
+ __ movprfx(z3, z4);
+ __ fmlslt(z3.VnS(), z17.VnH(), z3.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnH(), z0.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnS(), z0.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z2, z3);
+ __ mla(z2.VnD(), z0.VnD(), z2.VnD(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnH(), z0.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnS(), z0.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z2, z3);
+ __ mls(z2.VnD(), z0.VnD(), z2.VnD(), 0);
+
+ __ movprfx(z17, z18);
+ __ nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z17.VnD());
+
+ __ movprfx(z13, z14);
+ __ saba(z13.VnB(), z2.VnB(), z13.VnB());
+
+ __ movprfx(z13, z14);
+ __ sabalb(z13.VnD(), z13.VnS(), z26.VnS());
+
+ __ movprfx(z14, z15);
+ __ sabalt(z14.VnD(), z14.VnS(), z10.VnS());
+
+ __ movprfx(z19.VnD(), p5.Merging(), z20.VnD());
+ __ sadalp(z19.VnD(), p5.Merging(), z19.VnS());
+
+ __ movprfx(z17, z18);
+ __ sbclb(z17.VnS(), z17.VnS(), z8.VnS());
+
+ __ movprfx(z20, z21);
+ __ sbclt(z20.VnS(), z20.VnS(), z13.VnS());
+
+ __ movprfx(z20.VnB(), p3.Merging(), z21.VnB());
+ __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z20.VnB());
+
+ __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
+ __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z21.VnB());
+
+ __ movprfx(z1.VnB(), p0.Merging(), z2.VnB());
+ __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z1.VnB());
+
+ __ movprfx(z5, z6);
+ __ smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z5.VnB());
+
+ __ movprfx(z27, z28);
+ __ sminp(z27.VnB(), p3.Merging(), z27.VnB(), z27.VnB());
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnD(), z3.VnS(), z1.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalb(z1.VnS(), z1.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnD(), z1.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlalt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnD(), z1.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnD(), z3.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslb(z1.VnS(), z3.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnD(), z1.VnS(), z23.VnS());
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnD(), z3.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z1, z2);
+ __ smlslt(z1.VnS(), z1.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z29.VnB(), p1.Merging(), z30.VnB());
+ __ sqabs(z29.VnB(), p1.Merging(), z29.VnB());
+
+ __ movprfx(z28.VnB(), p0.Merging(), z29.VnB());
+ __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB());
+
+ __ movprfx(z20, z21);
+ __ sqcadd(z20.VnB(), z20.VnB(), z20.VnB(), 90);
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnD(), z6.VnS(), z25.VnS());
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnD(), z6.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z6, z7);
+ __ sqdmlalb(z6.VnS(), z6.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z23, z24);
+ __ sqdmlalbt(z23.VnD(), z23.VnS(), z26.VnS());
+
+ __ movprfx(z11, z12);
+ __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS());
+
+ __ movprfx(z11, z12);
+ __ sqdmlalt(z11.VnD(), z11.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z1, z12);
+ __ sqdmlalt(z1.VnS(), z0.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnD(), z26.VnS(), z16.VnS());
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnD(), z16.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z16, z17);
+ __ sqdmlslb(z16.VnS(), z16.VnH(), z2.VnH(), 0);
+
+ __ movprfx(z26, z27);
+ __ sqdmlslbt(z26.VnD(), z26.VnS(), z4.VnS());
+
+ __ movprfx(z21, z22);
+ __ sqdmlslt(z21.VnD(), z23.VnS(), z21.VnS());
+
+ __ movprfx(z21, z22);
+ __ sqdmlslt(z21.VnD(), z21.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z1, z22);
+ __ sqdmlslt(z21.VnS(), z23.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z21.VnB(), p0.Merging(), z22.VnB());
+ __ sqneg(z21.VnB(), p0.Merging(), z21.VnB());
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnB(), z15.VnB(), z31.VnB(), 0);
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnH(), z31.VnH(), z2.VnH(), 0, 0);
+
+ __ movprfx(z31, z0);
+ __ sqrdcmlah(z31.VnS(), z31.VnS(), z2.VnS(), 0, 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnB(), z27.VnB(), z19.VnB());
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnH(), z27.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnS(), z27.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z27, z28);
+ __ sqrdmlah(z27.VnD(), z27.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnB(), z16.VnB(), z11.VnB());
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnH(), z11.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnS(), z11.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z11, z12);
+ __ sqrdmlsh(z11.VnD(), z11.VnD(), z1.VnD(), 0);
+
+ __ movprfx(z31.VnB(), p5.Merging(), z0.VnB());
+ __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z31.VnB());
+
+ __ movprfx(z25.VnB(), p6.Merging(), z26.VnB());
+ __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z25.VnB());
+
+ __ movprfx(z0.VnB(), p5.Merging(), z1.VnB());
+ __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z0.VnB());
+
+ __ movprfx(z7.VnB(), p3.Merging(), z8.VnB());
+ __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z7.VnB());
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
+
+ __ movprfx(z23.VnB(), p4.Merging(), z24.VnB());
+ __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z23.VnB());
+
+ __ movprfx(z31.VnB(), p7.Merging(), z0.VnB());
+ __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z31.VnB());
+
+ __ movprfx(z16.VnB(), p7.Merging(), z17.VnB());
+ __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z16.VnB());
+
+ __ movprfx(z0, z1);
+ __ srsra(z0.VnB(), z0.VnB(), 1);
+
+ __ movprfx(z0, z1);
+ __ ssra(z0.VnB(), z0.VnB(), 1);
+
+ __ movprfx(z26.VnB(), p2.Merging(), z27.VnB());
+ __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z26.VnB());
+
+ __ movprfx(z23, z24);
+ __ uaba(z23.VnB(), z22.VnB(), z23.VnB());
+
+ __ movprfx(z11, z12);
+ __ uabalb(z11.VnD(), z25.VnS(), z11.VnS());
+
+ __ movprfx(z4, z5);
+ __ uabalt(z4.VnD(), z4.VnS(), z31.VnS());
+
+ __ movprfx(z20.VnD(), p4.Merging(), z21.VnD());
+ __ uadalp(z20.VnD(), p4.Merging(), z20.VnS());
+
+ __ movprfx(z21.VnB(), p2.Merging(), z22.VnB());
+ __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z21.VnB());
+
+ __ movprfx(z1.VnB(), p4.Merging(), z2.VnB());
+ __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z1.VnB());
+
+ __ movprfx(z18.VnB(), p0.Merging(), z19.VnB());
+ __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z18.VnB());
+
+ __ movprfx(z7, z8);
+ __ umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z7.VnB());
+
+ __ movprfx(z10, z11);
+ __ uminp(z10.VnB(), p0.Merging(), z10.VnB(), z10.VnB());
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnD(), z9.VnS(), z31.VnS());
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnD(), z31.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z31, z0);
+ __ umlalb(z31.VnS(), z31.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z11, z12);
+ __ umlalt(z11.VnD(), z11.VnS(), z22.VnS());
+
+ __ movprfx(z11, z12);
+ __ umlalt(z11.VnD(), z11.VnS(), z2.VnS(), 0);
+
+ __ movprfx(z1, z12);
+ __ umlalt(z1.VnS(), z5.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnD(), z28.VnS(), z9.VnS());
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnD(), z28.VnS(), z1.VnS(), 0);
+
+ __ movprfx(z28, z29);
+ __ umlslb(z28.VnS(), z28.VnH(), z1.VnH(), 0);
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnD(), z9.VnS(), z30.VnS());
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnD(), z9.VnS(), z0.VnS(), 0);
+
+ __ movprfx(z9, z10);
+ __ umlslt(z9.VnS(), z9.VnH(), z0.VnH(), 0);
+
+ __ movprfx(z24.VnB(), p7.Merging(), z25.VnB());
+ __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z24.VnB()),
+
+ __ movprfx(z20.VnB(), p1.Merging(), z21.VnB());
+ __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z20.VnB());
+
+ __ movprfx(z8.VnB(), p5.Merging(), z9.VnB());
+ __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z8.VnB());
+
+ __ movprfx(z29.VnB(), p7.Merging(), z30.VnB());
+ __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z29.VnB());
+
+ __ movprfx(z12.VnB(), p1.Merging(), z13.VnB());
+ __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB());
+
+ __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
+ __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
+
+ __ movprfx(z20.VnB(), p0.Merging(), z21.VnB());
+ __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z20.VnB());
+
+ __ movprfx(z25.VnS(), p7.Merging(), z26.VnS());
+ __ urecpe(z25.VnS(), p7.Merging(), z25.VnS());
+
+ __ movprfx(z29.VnB(), p4.Merging(), z30.VnB());
+ __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z29.VnB());
+
+ __ movprfx(z15.VnB(), p2.Merging(), z16.VnB());
+ __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z15.VnB());
+
+ __ movprfx(z27.VnB(), p1.Merging(), z28.VnB());
+ __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z27.VnB());
+
+ __ movprfx(z4.VnS(), p3.Merging(), z5.VnS());
+ __ ursqrte(z4.VnS(), p3.Merging(), z4.VnS());
+
+ __ movprfx(z0, z1);
+ __ ursra(z0.VnB(), z0.VnB(), 1);
+
+ __ movprfx(z25.VnB(), p4.Merging(), z26.VnB());
+ __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z25.VnB());
+
+ __ movprfx(z0, z1);
+ __ usra(z0.VnB(), z0.VnB(), 1);
+
+ __ movprfx(z16, z17);
+ __ xar(z16.VnB(), z16.VnB(), z16.VnB(), 1);
+ }
+ assm.FinalizeCode();
+
+ CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
+}
+
+TEST(movprfx_negative_lane_size_sve2) {
+ Assembler assm;
+ assm.GetCPUFeatures()->Combine(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ {
+ // We have to use the Assembler directly to generate movprfx, so we need
+ // to manually reserve space for the code we're about to emit.
+ static const size_t kPairCount = 140;
+ CodeBufferCheckScope guard(&assm, kPairCount * 2 * kInstructionSize);
+
+ __ movprfx(z14.VnS(), p4.Merging(), z15.VnS());
+ __ fcvtx(z14.VnS(), p4.Merging(), z0.VnD());
+
+ __ movprfx(z15.VnS(), p0.Merging(), z16.VnS());
+ __ flogb(z15.VnH(), p0.Merging(), z3.VnH());
+
+ __ movprfx(z19.VnB(), p5.Merging(), z20.VnB());
+ __ sadalp(z19.VnD(), p5.Merging(), z9.VnS());
+
+ __ movprfx(z20.VnH(), p3.Merging(), z21.VnH());
+ __ shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB());
+
+ __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
+ __ shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB());
+
+ __ movprfx(z1.VnS(), p0.Merging(), z2.VnS());
+ __ shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB());
+
+ __ movprfx(z29.VnD(), p1.Merging(), z30.VnD());
+ __ sqabs(z29.VnB(), p1.Merging(), z18.VnB());
+
+ __ movprfx(z28.VnH(), p0.Merging(), z29.VnH());
+ __ sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB());
+
+ __ movprfx(z21.VnH(), p0.Merging(), z22.VnH());
+ __ sqneg(z21.VnB(), p0.Merging(), z17.VnB());
+
+ __ movprfx(z31.VnS(), p5.Merging(), z0.VnS());
+ __ sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB());
+
+ __ movprfx(z25.VnD(), p6.Merging(), z26.VnD());
+ __ sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB());
+
+ __ movprfx(z0.VnH(), p5.Merging(), z1.VnH());
+ __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0);
+
+ __ movprfx(z0.VnS(), p5.Merging(), z1.VnS());
+ __ sqshl(z0.VnB(), p5.Merging(), z0.VnB(), z2.VnB());
+
+ __ movprfx(z7.VnD(), p3.Merging(), z8.VnD());
+ __ sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB());
+
+ __ movprfx(z10.VnH(), p1.Merging(), z11.VnH());
+ __ sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0);
+
+ __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
+ __ sqsub(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
+
+ __ movprfx(z16.VnS(), p7.Merging(), z17.VnS());
+ __ sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB());
+
+ __ movprfx(z23.VnD(), p4.Merging(), z24.VnD());
+ __ srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB());
+
+ __ movprfx(z31.VnH(), p7.Merging(), z0.VnH());
+ __ srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB());
+
+ __ movprfx(z16.VnH(), p7.Merging(), z17.VnH());
+ __ srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB());
+
+ __ movprfx(z12.VnH(), p0.Merging(), z13.VnH());
+ __ srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1);
+
+ __ movprfx(z26.VnH(), p2.Merging(), z27.VnH());
+ __ suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB());
+
+ __ movprfx(z20.VnB(), p4.Merging(), z21.VnB());
+ __ uadalp(z20.VnD(), p4.Merging(), z5.VnS());
+
+ __ movprfx(z21.VnH(), p2.Merging(), z22.VnH());
+ __ uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB());
+
+ __ movprfx(z1.VnH(), p4.Merging(), z2.VnH());
+ __ uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB());
+
+ __ movprfx(z18.VnH(), p0.Merging(), z19.VnH());
+ __ uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB());
+
+ __ movprfx(z24.VnH(), p7.Merging(), z25.VnH());
+ __ uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
+
+ __ movprfx(z20.VnS(), p1.Merging(), z21.VnS());
+ __ uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB());
+
+ __ movprfx(z8.VnS(), p5.Merging(), z9.VnS());
+ __ uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB());
+
+ __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
+ __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0);
+
+ __ movprfx(z29.VnS(), p7.Merging(), z30.VnS());
+ __ uqshl(z29.VnB(), p7.Merging(), z29.VnB(), z30.VnB());
+
+ __ movprfx(z12.VnS(), p1.Merging(), z13.VnS());
+ __ uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z13.VnB());
+
+ __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
+ __ uqsub(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
+
+ __ movprfx(z20.VnS(), p0.Merging(), z21.VnS());
+ __ uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB());
+
+ __ movprfx(z25.VnB(), p7.Merging(), z26.VnB());
+ __ urecpe(z25.VnS(), p7.Merging(), z2.VnS());
+
+ __ movprfx(z29.VnD(), p4.Merging(), z30.VnD());
+ __ urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB());
+
+ __ movprfx(z15.VnD(), p2.Merging(), z16.VnD());
+ __ urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB());
+
+ __ movprfx(z27.VnD(), p1.Merging(), z28.VnD());
+ __ urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB());
+
+ __ movprfx(z31.VnD(), p2.Merging(), z0.VnD());
+ __ urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1);
+
+ __ movprfx(z4.VnH(), p3.Merging(), z5.VnH());
+ __ ursqrte(z4.VnS(), p3.Merging(), z3.VnS());
+
+ __ movprfx(z25.VnD(), p4.Merging(), z26.VnD());
+ __ usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB());
+ }
+ assm.FinalizeCode();
+
+ CheckAndMaybeDisassembleMovprfxPairs(assm.GetBuffer(), false);
+}
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index e23bd8ce..4ca1a56e 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -10664,10 +10664,10 @@ TEST(atomic_memory_swp) {
TEST(ldaprb_ldaprh_ldapr) {
- uint64_t data0[] = {0x1010101010101010, 0};
- uint64_t data1[] = {0x1010101010101010, 0};
- uint64_t data2[] = {0x1010101010101010, 0};
- uint64_t data3[] = {0x1010101010101010, 0};
+ uint64_t data0[] = {0x1010101010101010, 0x1010101010101010};
+ uint64_t data1[] = {0x1010101010101010, 0x1010101010101010};
+ uint64_t data2[] = {0x1010101010101010, 0x1010101010101010};
+ uint64_t data3[] = {0x1010101010101010, 0x1010101010101010};
uint64_t* data0_aligned = AlignUp(data0, kXRegSizeInBytes * 2);
uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
diff --git a/test/aarch64/test-assembler-aarch64.h b/test/aarch64/test-assembler-aarch64.h
index 31e926dd..c3f3264e 100644
--- a/test/aarch64/test-assembler-aarch64.h
+++ b/test/aarch64/test-assembler-aarch64.h
@@ -164,7 +164,7 @@ namespace aarch64 {
{ \
/* We expect the test to use all of the features it requested, plus the */ \
/* features that the instructure code requires. */ \
- CPUFeatures const& expected = \
+ CPUFeatures const& expected_features = \
simulator.GetCPUFeatures()->With(CPUFeatures::kNEON); \
CPUFeatures const& seen = simulator.GetSeenFeatures(); \
/* This gives three broad categories of features that we care about: */ \
@@ -172,13 +172,13 @@ namespace aarch64 {
/* 2. Things seen, but not expected. The simulator catches these. */ \
/* 3. Things expected, but not seen. We check these here. */ \
/* In a valid, passing test, categories 2 and 3 should be empty. */ \
- if (seen != expected) { \
+ if (seen != expected_features) { \
/* The Simulator should have caught anything in category 2 already. */ \
- VIXL_ASSERT(expected.Has(seen)); \
+ VIXL_ASSERT(expected_features.Has(seen)); \
/* Anything left is category 3: things expected, but not seen. This */ \
/* is not necessarily a bug in VIXL itself, but indicates that the */ \
/* test is less strict than it could be. */ \
- CPUFeatures missing = expected.Without(seen); \
+ CPUFeatures missing = expected_features.Without(seen); \
VIXL_ASSERT(missing.Count() > 0); \
std::cout << "Error: expected to see CPUFeatures { " << missing \
<< " }\n"; \
@@ -265,15 +265,15 @@ namespace aarch64 {
if (Test::disassemble()) { \
PrintDisassembler disasm(stdout); \
CodeBuffer* buffer = masm.GetBuffer(); \
- Instruction* start = buffer->GetOffsetAddress<Instruction*>( \
+ Instruction* test_start = buffer->GetOffsetAddress<Instruction*>( \
offset_after_infrastructure_start); \
- Instruction* end = buffer->GetOffsetAddress<Instruction*>( \
+ Instruction* test_end = buffer->GetOffsetAddress<Instruction*>( \
offset_before_infrastructure_end); \
\
if (Test::disassemble_infrastructure()) { \
Instruction* infra_start = buffer->GetStartAddress<Instruction*>(); \
printf("# Infrastructure code (prologue)\n"); \
- disasm.DisassembleBuffer(infra_start, start); \
+ disasm.DisassembleBuffer(infra_start, test_start); \
printf("# Test code\n"); \
} else { \
printf( \
@@ -281,12 +281,12 @@ namespace aarch64 {
"Use --disassemble to see it.\n"); \
} \
\
- disasm.DisassembleBuffer(start, end); \
+ disasm.DisassembleBuffer(test_start, test_end); \
\
if (Test::disassemble_infrastructure()) { \
printf("# Infrastructure code (epilogue)\n"); \
Instruction* infra_end = buffer->GetEndAddress<Instruction*>(); \
- disasm.DisassembleBuffer(end, infra_end); \
+ disasm.DisassembleBuffer(test_end, infra_end); \
} \
}
diff --git a/test/aarch64/test-assembler-fp-aarch64.cc b/test/aarch64/test-assembler-fp-aarch64.cc
index b9a581e7..4ae9ec7e 100644
--- a/test/aarch64/test-assembler-fp-aarch64.cc
+++ b/test/aarch64/test-assembler-fp-aarch64.cc
@@ -905,95 +905,209 @@ TEST(fmadd_fmsub_float) {
TEST(fmadd_fmsub_double_nans) {
// Make sure that NaN propagation works correctly.
- double s1 = RawbitsToDouble(0x7ff5555511111111);
- double s2 = RawbitsToDouble(0x7ff5555522222222);
- double sa = RawbitsToDouble(0x7ff55555aaaaaaaa);
- double q1 = RawbitsToDouble(0x7ffaaaaa11111111);
- double q2 = RawbitsToDouble(0x7ffaaaaa22222222);
- double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
- VIXL_ASSERT(IsSignallingNaN(s1));
- VIXL_ASSERT(IsSignallingNaN(s2));
- VIXL_ASSERT(IsSignallingNaN(sa));
- VIXL_ASSERT(IsQuietNaN(q1));
- VIXL_ASSERT(IsQuietNaN(q2));
- VIXL_ASSERT(IsQuietNaN(qa));
+ double sig1 = RawbitsToDouble(0x7ff5555511111111);
+ double sig2 = RawbitsToDouble(0x7ff5555522222222);
+ double siga = RawbitsToDouble(0x7ff55555aaaaaaaa);
+ double qui1 = RawbitsToDouble(0x7ffaaaaa11111111);
+ double qui2 = RawbitsToDouble(0x7ffaaaaa22222222);
+ double quia = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
+ VIXL_ASSERT(IsSignallingNaN(sig1));
+ VIXL_ASSERT(IsSignallingNaN(sig2));
+ VIXL_ASSERT(IsSignallingNaN(siga));
+ VIXL_ASSERT(IsQuietNaN(qui1));
+ VIXL_ASSERT(IsQuietNaN(qui2));
+ VIXL_ASSERT(IsQuietNaN(quia));
// The input NaNs after passing through ProcessNaN.
- double s1_proc = RawbitsToDouble(0x7ffd555511111111);
- double s2_proc = RawbitsToDouble(0x7ffd555522222222);
- double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa);
- double q1_proc = q1;
- double q2_proc = q2;
- double qa_proc = qa;
- VIXL_ASSERT(IsQuietNaN(s1_proc));
- VIXL_ASSERT(IsQuietNaN(s2_proc));
- VIXL_ASSERT(IsQuietNaN(sa_proc));
- VIXL_ASSERT(IsQuietNaN(q1_proc));
- VIXL_ASSERT(IsQuietNaN(q2_proc));
- VIXL_ASSERT(IsQuietNaN(qa_proc));
+ double sig1_proc = RawbitsToDouble(0x7ffd555511111111);
+ double sig2_proc = RawbitsToDouble(0x7ffd555522222222);
+ double siga_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa);
+ double qui1_proc = qui1;
+ double qui2_proc = qui2;
+ double quia_proc = quia;
+ VIXL_ASSERT(IsQuietNaN(sig1_proc));
+ VIXL_ASSERT(IsQuietNaN(sig2_proc));
+ VIXL_ASSERT(IsQuietNaN(siga_proc));
+ VIXL_ASSERT(IsQuietNaN(qui1_proc));
+ VIXL_ASSERT(IsQuietNaN(qui2_proc));
+ VIXL_ASSERT(IsQuietNaN(quia_proc));
// Negated NaNs as it would be done on ARMv8 hardware.
- double s1_proc_neg = RawbitsToDouble(0xfffd555511111111);
- double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa);
- double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111);
- double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa);
- VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
- VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
- VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
- VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
+ double sig1_proc_neg = RawbitsToDouble(0xfffd555511111111);
+ double siga_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa);
+ double qui1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111);
+ double quia_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa);
+ VIXL_ASSERT(IsQuietNaN(sig1_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(siga_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(qui1_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(quia_proc_neg));
// Quiet NaNs are propagated.
- FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
- FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
- FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
- FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
+ FmaddFmsubHelper(qui1,
+ 0,
+ 0,
+ qui1_proc,
+ qui1_proc_neg,
+ qui1_proc_neg,
+ qui1_proc);
+ FmaddFmsubHelper(0, qui2, 0, qui2_proc, qui2_proc, qui2_proc, qui2_proc);
+ FmaddFmsubHelper(0,
+ 0,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ 0,
+ qui1_proc,
+ qui1_proc_neg,
+ qui1_proc_neg,
+ qui1_proc);
+ FmaddFmsubHelper(0,
+ qui2,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ 0,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
// Signalling NaNs are propagated, and made quiet.
- FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
- FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
+ FmaddFmsubHelper(sig1,
+ 0,
+ 0,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(0, sig2, 0, sig2_proc, sig2_proc, sig2_proc, sig2_proc);
+ FmaddFmsubHelper(0,
+ 0,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ 0,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(0,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ 0,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
// Signalling NaNs take precedence over quiet NaNs.
- FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
- FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
+ FmaddFmsubHelper(sig1,
+ qui2,
+ quia,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(qui1,
+ sig2,
+ quia,
+ sig2_proc,
+ sig2_proc,
+ sig2_proc,
+ sig2_proc);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ quia,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(qui1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ qui2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
// A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
FmaddFmsubHelper(0,
kFP64PositiveInfinity,
- qa,
+ quia,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN);
FmaddFmsubHelper(kFP64PositiveInfinity,
0,
- qa,
+ quia,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN);
FmaddFmsubHelper(0,
kFP64NegativeInfinity,
- qa,
+ quia,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN);
FmaddFmsubHelper(kFP64NegativeInfinity,
0,
- qa,
+ quia,
kFP64DefaultNaN,
kFP64DefaultNaN,
kFP64DefaultNaN,
@@ -1003,95 +1117,209 @@ TEST(fmadd_fmsub_double_nans) {
TEST(fmadd_fmsub_float_nans) {
// Make sure that NaN propagation works correctly.
- float s1 = RawbitsToFloat(0x7f951111);
- float s2 = RawbitsToFloat(0x7f952222);
- float sa = RawbitsToFloat(0x7f95aaaa);
- float q1 = RawbitsToFloat(0x7fea1111);
- float q2 = RawbitsToFloat(0x7fea2222);
- float qa = RawbitsToFloat(0x7feaaaaa);
- VIXL_ASSERT(IsSignallingNaN(s1));
- VIXL_ASSERT(IsSignallingNaN(s2));
- VIXL_ASSERT(IsSignallingNaN(sa));
- VIXL_ASSERT(IsQuietNaN(q1));
- VIXL_ASSERT(IsQuietNaN(q2));
- VIXL_ASSERT(IsQuietNaN(qa));
+ float sig1 = RawbitsToFloat(0x7f951111);
+ float sig2 = RawbitsToFloat(0x7f952222);
+ float siga = RawbitsToFloat(0x7f95aaaa);
+ float qui1 = RawbitsToFloat(0x7fea1111);
+ float qui2 = RawbitsToFloat(0x7fea2222);
+ float quia = RawbitsToFloat(0x7feaaaaa);
+ VIXL_ASSERT(IsSignallingNaN(sig1));
+ VIXL_ASSERT(IsSignallingNaN(sig2));
+ VIXL_ASSERT(IsSignallingNaN(siga));
+ VIXL_ASSERT(IsQuietNaN(qui1));
+ VIXL_ASSERT(IsQuietNaN(qui2));
+ VIXL_ASSERT(IsQuietNaN(quia));
// The input NaNs after passing through ProcessNaN.
- float s1_proc = RawbitsToFloat(0x7fd51111);
- float s2_proc = RawbitsToFloat(0x7fd52222);
- float sa_proc = RawbitsToFloat(0x7fd5aaaa);
- float q1_proc = q1;
- float q2_proc = q2;
- float qa_proc = qa;
- VIXL_ASSERT(IsQuietNaN(s1_proc));
- VIXL_ASSERT(IsQuietNaN(s2_proc));
- VIXL_ASSERT(IsQuietNaN(sa_proc));
- VIXL_ASSERT(IsQuietNaN(q1_proc));
- VIXL_ASSERT(IsQuietNaN(q2_proc));
- VIXL_ASSERT(IsQuietNaN(qa_proc));
+ float sig1_proc = RawbitsToFloat(0x7fd51111);
+ float sig2_proc = RawbitsToFloat(0x7fd52222);
+ float siga_proc = RawbitsToFloat(0x7fd5aaaa);
+ float qui1_proc = qui1;
+ float qui2_proc = qui2;
+ float quia_proc = quia;
+ VIXL_ASSERT(IsQuietNaN(sig1_proc));
+ VIXL_ASSERT(IsQuietNaN(sig2_proc));
+ VIXL_ASSERT(IsQuietNaN(siga_proc));
+ VIXL_ASSERT(IsQuietNaN(qui1_proc));
+ VIXL_ASSERT(IsQuietNaN(qui2_proc));
+ VIXL_ASSERT(IsQuietNaN(quia_proc));
// Negated NaNs as it would be done on ARMv8 hardware.
- float s1_proc_neg = RawbitsToFloat(0xffd51111);
- float sa_proc_neg = RawbitsToFloat(0xffd5aaaa);
- float q1_proc_neg = RawbitsToFloat(0xffea1111);
- float qa_proc_neg = RawbitsToFloat(0xffeaaaaa);
- VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
- VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
- VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
- VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
+ float sig1_proc_neg = RawbitsToFloat(0xffd51111);
+ float siga_proc_neg = RawbitsToFloat(0xffd5aaaa);
+ float qui1_proc_neg = RawbitsToFloat(0xffea1111);
+ float quia_proc_neg = RawbitsToFloat(0xffeaaaaa);
+ VIXL_ASSERT(IsQuietNaN(sig1_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(siga_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(qui1_proc_neg));
+ VIXL_ASSERT(IsQuietNaN(quia_proc_neg));
// Quiet NaNs are propagated.
- FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
- FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
- FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
- FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
- FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
+ FmaddFmsubHelper(qui1,
+ 0,
+ 0,
+ qui1_proc,
+ qui1_proc_neg,
+ qui1_proc_neg,
+ qui1_proc);
+ FmaddFmsubHelper(0, qui2, 0, qui2_proc, qui2_proc, qui2_proc, qui2_proc);
+ FmaddFmsubHelper(0,
+ 0,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ 0,
+ qui1_proc,
+ qui1_proc_neg,
+ qui1_proc_neg,
+ qui1_proc);
+ FmaddFmsubHelper(0,
+ qui2,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ 0,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ quia,
+ quia_proc,
+ quia_proc,
+ quia_proc_neg,
+ quia_proc_neg);
// Signalling NaNs are propagated, and made quiet.
- FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
- FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
+ FmaddFmsubHelper(sig1,
+ 0,
+ 0,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(0, sig2, 0, sig2_proc, sig2_proc, sig2_proc, sig2_proc);
+ FmaddFmsubHelper(0,
+ 0,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ 0,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(0,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ 0,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
// Signalling NaNs take precedence over quiet NaNs.
- FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
- FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
- FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
- FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
+ FmaddFmsubHelper(sig1,
+ qui2,
+ quia,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(qui1,
+ sig2,
+ quia,
+ sig2_proc,
+ sig2_proc,
+ sig2_proc,
+ sig2_proc);
+ FmaddFmsubHelper(qui1,
+ qui2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ quia,
+ sig1_proc,
+ sig1_proc_neg,
+ sig1_proc_neg,
+ sig1_proc);
+ FmaddFmsubHelper(qui1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ qui2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
+ FmaddFmsubHelper(sig1,
+ sig2,
+ siga,
+ siga_proc,
+ siga_proc,
+ siga_proc_neg,
+ siga_proc_neg);
// A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
FmaddFmsubHelper(0,
kFP32PositiveInfinity,
- qa,
+ quia,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN);
FmaddFmsubHelper(kFP32PositiveInfinity,
0,
- qa,
+ quia,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN);
FmaddFmsubHelper(0,
kFP32NegativeInfinity,
- qa,
+ quia,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN);
FmaddFmsubHelper(kFP32NegativeInfinity,
0,
- qa,
+ quia,
kFP32DefaultNaN,
kFP32DefaultNaN,
kFP32DefaultNaN,
diff --git a/test/aarch64/test-assembler-neon-aarch64.cc b/test/aarch64/test-assembler-neon-aarch64.cc
index d2e04052..b1cf5163 100644
--- a/test/aarch64/test-assembler-neon-aarch64.cc
+++ b/test/aarch64/test-assembler-neon-aarch64.cc
@@ -3896,6 +3896,30 @@ TEST(neon_fcvtn) {
}
}
+TEST(neon_fcvtn_fcvtxn_regression_test) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
+
+ START();
+ __ Movi(v0.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
+ __ Movi(v1.V2D(), 0x3f800000bf800000, 0x40000000c0000000);
+ __ Movi(v2.V2D(), 0x3ff0000000000000, 0xbff0000000000000);
+
+ __ Fcvtn(v16.V2S(), v0.V2D());
+ __ Fcvtn(v17.V4H(), v1.V4S());
+ __ Fcvtn(v0.V2S(), v0.V2D());
+ __ Fcvtn(v1.V4H(), v1.V4S());
+ __ Fcvtxn(v2.V2S(), v2.V2D());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x3c00bc004000c000, q1);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x3f800000bf800000, q2);
+ }
+}
TEST(neon_fcvtxn) {
SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
@@ -10818,6 +10842,1371 @@ TEST(neon_tbl) {
}
}
+TEST(neon_usdot) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON,
+ CPUFeatures::kDotProduct,
+ CPUFeatures::kI8MM);
+
+ START();
+ __ Movi(v0.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);
+ __ Movi(v1.V2D(), 0x7f7f7f7f7f7f7f7f, 0x7f7f7f7f7f7f7f7f);
+ __ Movi(v2.V2D(), 0x8080808080808080, 0x8080808080808080);
+ __ Movi(v3.V2D(), 0, 0);
+ __ Mov(q4, q3);
+ __ Mov(q5, q3);
+ __ Mov(q6, q3);
+ __ Mov(q7, q3);
+ __ Mov(q8, q3);
+ __ Mov(q9, q3);
+ __ Mov(q10, q3);
+ __ Mov(q11, q3);
+
+ // Test Usdot against Udot/Sdot over the range of inputs where they should be
+ // equal.
+ __ Usdot(v3.V2S(), v0.V8B(), v1.V8B());
+ __ Udot(v4.V2S(), v0.V8B(), v1.V8B());
+ __ Cmeq(v3.V4S(), v3.V4S(), v4.V4S());
+ __ Usdot(v5.V4S(), v0.V16B(), v1.V16B());
+ __ Udot(v6.V4S(), v0.V16B(), v1.V16B());
+ __ Cmeq(v5.V4S(), v5.V4S(), v6.V4S());
+
+ __ Usdot(v7.V2S(), v1.V8B(), v2.V8B());
+ __ Sdot(v8.V2S(), v1.V8B(), v2.V8B());
+ __ Cmeq(v7.V4S(), v7.V4S(), v8.V4S());
+ __ Usdot(v9.V4S(), v1.V16B(), v2.V16B());
+ __ Sdot(v10.V4S(), v1.V16B(), v2.V16B());
+ __ Cmeq(v9.V4S(), v9.V4S(), v10.V4S());
+
+ // Construct values which, when interpreted correctly as signed/unsigned,
+ // should give a zero result for dot product.
+ __ Mov(w0, 0x8101ff40); // [-127, 1, -1, 64] as signed bytes.
+ __ Mov(w1, 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes.
+ __ Dup(v0.V4S(), w0);
+ __ Dup(v1.V4S(), w1);
+ __ Usdot(v11.V4S(), v1.V16B(), v0.V16B());
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_128(-1, -1, q3);
+ ASSERT_EQUAL_128(-1, -1, q5);
+ ASSERT_EQUAL_128(-1, -1, q7);
+ ASSERT_EQUAL_128(-1, -1, q9);
+ ASSERT_EQUAL_128(0, 0, q11);
+ }
+}
+
+TEST(neon_usdot_element) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kI8MM);
+
+ START();
+ __ Movi(v0.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+ __ Movi(v1.V2D(), 0x4242424242424242, 0x5555aaaaaaaa5555);
+
+ // Test element Usdot against vector variant.
+ __ Dup(v2.V4S(), v1.V4S(), 0);
+ __ Dup(v3.V4S(), v1.V4S(), 1);
+ __ Dup(v4.V4S(), v1.V4S(), 3);
+
+ __ Mov(q10, q1);
+ __ Usdot(v10.V2S(), v0.V8B(), v2.V8B());
+ __ Mov(q11, q1);
+ __ Usdot(v11.V2S(), v0.V8B(), v1.S4B(), 0);
+ __ Cmeq(v11.V4S(), v11.V4S(), v10.V4S());
+
+ __ Mov(q12, q1);
+ __ Usdot(v12.V4S(), v0.V16B(), v3.V16B());
+ __ Mov(q13, q1);
+ __ Usdot(v13.V4S(), v0.V16B(), v1.S4B(), 1);
+ __ Cmeq(v13.V4S(), v13.V4S(), v12.V4S());
+
+ __ Mov(q14, q1);
+ __ Usdot(v14.V4S(), v4.V16B(), v0.V16B());
+ __ Mov(q15, q1);
+ __ Sudot(v15.V4S(), v0.V16B(), v1.S4B(), 3);
+ __ Cmeq(v15.V4S(), v15.V4S(), v14.V4S());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_128(-1, -1, q11);
+ ASSERT_EQUAL_128(-1, -1, q13);
+ ASSERT_EQUAL_128(-1, -1, q15);
+ }
+}
+
+TEST(zero_high_b) {
+ SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kNEON, CPUFeatures::kRDM);
+ START();
+
+ __ Mov(x0, 0x55aa42ffaa42ff55);
+ __ Mov(x1, 4);
+ __ Movi(q30.V16B(), 0);
+
+ // Iterate over the SISD instructions using different input values on each
+ // loop.
+ Label loop;
+ __ Bind(&loop);
+
+ __ Dup(q0.V16B(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q1.V16B(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q2.V16B(), w0);
+ __ Ror(x0, x0, 8);
+
+ {
+ ExactAssemblyScope scope(&masm, 81 * kInstructionSize);
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e010409); // mov b9, v0.b[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e207809); // sqabs b9, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e200c29); // sqadd b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e207809); // sqneg b9, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e008429); // sqrdmlah b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e008c29); // sqrdmlsh b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e205c29); // sqrshl b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f089c09); // sqrshrn b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f088c09); // sqrshrun b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e204c29); // sqshl b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f087409); // sqshl b9, b0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f086409); // sqshlu b9, b0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f089409); // sqshrn b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f088409); // sqshrun b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e202c29); // sqsub b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e214809); // sqxtn b9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e212809); // sqxtun b9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e203809); // suqadd b9, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e200c29); // uqadd b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e205c29); // uqrshl b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f089c09); // uqrshrn b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e204c29); // uqshl b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f087409); // uqshl b9, b0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f089409); // uqshrn b9, h0, #8
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e202c29); // uqsub b9, b1, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e214809); // uqxtn b9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e203809); // usqadd b9, b0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+ }
+ __ Sub(x1, x1, 1);
+ __ Cbnz(x1, &loop);
+
+ __ Ins(q30.V16B(), 0, wzr);
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_128(0, 0, q30);
+ }
+}
+
+TEST(zero_high_h) {
+ SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kFP,
+ CPUFeatures::kNEONHalf,
+ CPUFeatures::kRDM);
+ START();
+
+ __ Mov(x0, 0x55aa42ffaa42ff55);
+ __ Mov(x1, 4);
+ __ Movi(q30.V16B(), 0);
+
+ // Iterate over the SISD instructions using different input values on each
+ // loop.
+ Label loop;
+ __ Bind(&loop);
+
+ __ Dup(q0.V8H(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q1.V8H(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q2.V8H(), w0);
+ __ Ror(x0, x0, 8);
+
+ {
+ ExactAssemblyScope scope(&masm, 225 * kInstructionSize);
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e020409); // mov h9, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ec01429); // fabd h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e402c29); // facge h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ec02c29); // facgt h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e30d809); // faddp h9, v0.2h
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef8d809); // fcmeq h9, h0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e402429); // fcmeq h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef8c809); // fcmge h9, h0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e402429); // fcmge h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef8c809); // fcmgt h9, h0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ec02429); // fcmgt h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef8d809); // fcmle h9, h0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef8e809); // fcmlt h9, h0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e79c809); // fcvtas h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e79c809); // fcvtau h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e79b809); // fcvtms h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e79b809); // fcvtmu h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e79a809); // fcvtns h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e79a809); // fcvtnu h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef9a809); // fcvtps h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef9a809); // fcvtpu h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef9b809); // fcvtzs h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f10fc09); // fcvtzs h9, h0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef9b809); // fcvtzu h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f10fc09); // fcvtzu h9, h0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e30c809); // fmaxnmp h9, v0.2h
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e30f809); // fmaxp h9, v0.2h
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5eb0c809); // fminnmp h9, v0.2h
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5eb0f809); // fminp h9, v0.2h
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f001029); // fmla h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f005029); // fmls h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f009029); // fmul h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f009029); // fmulx h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e401c29); // fmulx h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef9d809); // frecpe h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e403c29); // frecps h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef9f809); // frecpx h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef9d809); // frsqrte h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ec03c29); // frsqrts h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e79d809); // scvtf h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f10e409); // scvtf h9, h0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e607809); // sqabs h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e600c29); // sqadd h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f40c029); // sqdmulh h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60b429); // sqdmulh h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e607809); // sqneg h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f40d029); // sqrdmlah h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e408429); // sqrdmlah h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f40f029); // sqrdmlsh h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e408c29); // sqrdmlsh h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f40d029); // sqrdmulh h9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e60b429); // sqrdmulh h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e605c29); // sqrshl h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f109c09); // sqrshrn h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f108c09); // sqrshrun h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e604c29); // sqshl h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f107409); // sqshl h9, h0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f106409); // sqshlu h9, h0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f109409); // sqshrn h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f108409); // sqshrun h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e602c29); // sqsub h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e614809); // sqxtn h9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e612809); // sqxtun h9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e603809); // suqadd h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e79d809); // ucvtf h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f10e409); // ucvtf h9, h0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e600c29); // uqadd h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e605c29); // uqrshl h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f109c09); // uqrshrn h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e604c29); // uqshl h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f107409); // uqshl h9, h0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f109409); // uqshrn h9, s0, #16
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e602c29); // uqsub h9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e614809); // uqxtn h9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e603809); // usqadd h9, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+ }
+ __ Sub(x1, x1, 1);
+ __ Cbnz(x1, &loop);
+
+ __ Ins(q30.V8H(), 0, wzr);
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_128(0, 0, q30);
+ }
+}
+
+TEST(zero_high_s) {
+ SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kFP,
+ CPUFeatures::kRDM);
+ START();
+
+ __ Mov(x0, 0x55aa42ffaa42ff55);
+ __ Mov(x1, 4);
+ __ Movi(q30.V16B(), 0);
+
+ // Iterate over the SISD instructions using different input values on each
+ // loop.
+ Label loop;
+ __ Bind(&loop);
+
+ __ Dup(q0.V4S(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q1.V4S(), w0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q2.V4S(), w0);
+ __ Ror(x0, x0, 8);
+
+ {
+ ExactAssemblyScope scope(&masm, 246 * kInstructionSize);
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e040409); // mov s9, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0d429); // fabd s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e20ec29); // facge s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0ec29); // facgt s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e30d809); // faddp s9, v0.2s
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0d809); // fcmeq s9, s0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e20e429); // fcmeq s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0c809); // fcmge s9, s0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e20e429); // fcmge s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0c809); // fcmgt s9, s0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0e429); // fcmgt s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0d809); // fcmle s9, s0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0e809); // fcmlt s9, s0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e21c809); // fcvtas s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e21c809); // fcvtau s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e21b809); // fcvtms s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e21b809); // fcvtmu s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e21a809); // fcvtns s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e21a809); // fcvtnu s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea1a809); // fcvtps s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea1a809); // fcvtpu s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e616809); // fcvtxn s9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea1b809); // fcvtzs s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f20fc09); // fcvtzs s9, s0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea1b809); // fcvtzu s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f20fc09); // fcvtzu s9, s0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e30c809); // fmaxnmp s9, v0.2s
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e30f809); // fmaxp s9, v0.2s
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7eb0c809); // fminnmp s9, v0.2s
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7eb0f809); // fminp s9, v0.2s
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f801029); // fmla s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f805029); // fmls s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f809029); // fmul s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f809029); // fmulx s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e20dc29); // fmulx s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea1d809); // frecpe s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e20fc29); // frecps s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea1f809); // frecpx s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea1d809); // frsqrte s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0fc29); // frsqrts s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e21d809); // scvtf s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f20e409); // scvtf s9, s0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea07809); // sqabs s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea00c29); // sqadd s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e609029); // sqdmlal s9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f403029); // sqdmlal s9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60b029); // sqdmlsl s9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f407029); // sqdmlsl s9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f80c029); // sqdmulh s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0b429); // sqdmulh s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60d029); // sqdmull s9, h1, h0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f40b029); // sqdmull s9, h1, v0.h[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea07809); // sqneg s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f80d029); // sqrdmlah s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e808429); // sqrdmlah s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f80f029); // sqrdmlsh s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e808c29); // sqrdmlsh s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f80d029); // sqrdmulh s9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea0b429); // sqrdmulh s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea05c29); // sqrshl s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f209c09); // sqrshrn s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f208c09); // sqrshrun s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea04c29); // sqshl s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f207409); // sqshl s9, s0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f206409); // sqshlu s9, s0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f209409); // sqshrn s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f208409); // sqshrun s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea02c29); // sqsub s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea14809); // sqxtn s9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea12809); // sqxtun s9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea03809); // suqadd s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e21d809); // ucvtf s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f20e409); // ucvtf s9, s0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea00c29); // uqadd s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea05c29); // uqrshl s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f209c09); // uqrshrn s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea04c29); // uqshl s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f207409); // uqshl s9, s0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f209409); // uqshrn s9, d0, #32
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea02c29); // uqsub s9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea14809); // uqxtn s9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ea03809); // usqadd s9, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+ }
+ __ Sub(x1, x1, 1);
+ __ Cbnz(x1, &loop);
+
+ __ Ins(q30.V4S(), 0, wzr);
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_128(0, 0, q30);
+ }
+}
+
+TEST(zero_high_d) {
+ SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kFP,
+ CPUFeatures::kRDM);
+ START();
+
+ __ Mov(x0, 0x55aa42ffaa42ff55);
+ __ Mov(x1, 4);
+ __ Movi(q30.V16B(), 0);
+
+ // Iterate over the SISD instructions using different input values on each
+ // loop.
+ Label loop;
+ __ Bind(&loop);
+
+ __ Dup(q0.V2D(), x0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q1.V2D(), x0);
+ __ Ror(x0, x0, 8);
+ __ Dup(q2.V2D(), x0);
+ __ Ror(x0, x0, 8);
+
+ {
+ ExactAssemblyScope scope(&masm, 291 * kInstructionSize);
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0b809); // abs d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee08429); // add d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ef1b809); // addp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee09809); // cmeq d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee08c29); // cmeq d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee08809); // cmge d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee03c29); // cmge d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee08809); // cmgt d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee03429); // cmgt d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee03429); // cmhi d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee03c29); // cmhs d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee09809); // cmle d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0a809); // cmlt d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee08c29); // cmtst d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e080409); // mov d9, v0.d[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0d429); // fabd d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e60ec29); // facge d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0ec29); // facgt d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e70d809); // faddp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0d809); // fcmeq d9, d0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60e429); // fcmeq d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0c809); // fcmge d9, d0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e60e429); // fcmge d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0c809); // fcmgt d9, d0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0e429); // fcmgt d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0d809); // fcmle d9, d0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0e809); // fcmlt d9, d0, #0.0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e61c809); // fcvtas d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e61c809); // fcvtau d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e61b809); // fcvtms d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e61b809); // fcvtmu d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e61a809); // fcvtns d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e61a809); // fcvtnu d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee1a809); // fcvtps d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee1a809); // fcvtpu d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee1b809); // fcvtzs d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f40fc09); // fcvtzs d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee1b809); // fcvtzu d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f40fc09); // fcvtzu d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e70c809); // fmaxnmp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e70f809); // fmaxp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef0c809); // fminnmp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ef0f809); // fminp d9, v0.2d
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5fc01029); // fmla d9, d1, v0.d[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5fc05029); // fmls d9, d1, v0.d[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5fc09029); // fmul d9, d1, v0.d[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7fc09029); // fmulx d9, d1, v0.d[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60dc29); // fmulx d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee1d809); // frecpe d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e60fc29); // frecps d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee1f809); // frecpx d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee1d809); // frsqrte d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee0fc29); // frsqrts d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee0b809); // neg d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5e61d809); // scvtf d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f40e409); // scvtf d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f405409); // shl d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f405409); // sli d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee07809); // sqabs d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee00c29); // sqadd d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea09029); // sqdmlal d9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f803029); // sqdmlal d9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0b029); // sqdmlsl d9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f807029); // sqdmlsl d9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ea0d029); // sqdmull d9, s1, s0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f80b029); // sqdmull d9, s1, v0.s[0]
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee07809); // sqneg d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ec08429); // sqrdmlah d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ec08c29); // sqrdmlsh d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee05c29); // sqrshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee04c29); // sqshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f407409); // sqshl d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f406409); // sqshlu d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee02c29); // sqsub d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f404409); // sri d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee05429); // srshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f402409); // srshr d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f403409); // srsra d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee04429); // sshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f400409); // sshr d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5f401409); // ssra d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee08429); // sub d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x5ee03809); // suqadd d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7e61d809); // ucvtf d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f40e409); // ucvtf d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee00c29); // uqadd d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee05c29); // uqrshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee04c29); // uqshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f407409); // uqshl d9, d0, #0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee02c29); // uqsub d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee05429); // urshl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f402409); // urshr d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f403409); // ursra d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee04429); // ushl d9, d1, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f400409); // ushr d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7ee03809); // usqadd d9, d0
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+
+ __ movi(q9.V16B(), 0x55);
+ __ dci(0x7f401409); // usra d9, d0, #64
+ __ orr(q30.V16B(), q30.V16B(), q9.V16B());
+ }
+ __ Sub(x1, x1, 1);
+ __ Cbnz(x1, &loop);
+
+ __ Ins(q30.V2D(), 0, xzr);
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_128(0, 0, q30);
+ }
+}
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 61583143..053d5c82 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -44,76 +44,11 @@
#include "aarch64/simulator-aarch64.h"
#include "test-assembler-aarch64.h"
+#define TEST_SVE(name) TEST_SVE_INNER("ASM", name)
+
namespace vixl {
namespace aarch64 {
-Test* MakeSVETest(int vl, const char* name, Test::TestFunctionWithConfig* fn) {
- // We never free this memory, but we need it to live for as long as the static
- // linked list of tests, and this is the easiest way to do it.
- Test* test = new Test(name, fn);
- test->set_sve_vl_in_bits(vl);
- return test;
-}
-
-// The TEST_SVE macro works just like the usual TEST macro, but the resulting
-// function receives a `const Test& config` argument, to allow it to query the
-// vector length.
-#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
-// On the Simulator, run SVE tests with several vector lengths, including the
-// extreme values and an intermediate value that isn't a power of two.
-
-#define TEST_SVE(name) \
- void Test##name(Test* config); \
- Test* test_##name##_list[] = \
- {MakeSVETest(128, "AARCH64_ASM_" #name "_vl128", &Test##name), \
- MakeSVETest(384, "AARCH64_ASM_" #name "_vl384", &Test##name), \
- MakeSVETest(2048, "AARCH64_ASM_" #name "_vl2048", &Test##name)}; \
- void Test##name(Test* config)
-
-#define SVE_SETUP_WITH_FEATURES(...) \
- SETUP_WITH_FEATURES(__VA_ARGS__); \
- simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
-
-#else
-// Otherwise, just use whatever the hardware provides.
-static const int kSVEVectorLengthInBits =
- CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
- ? CPU::ReadSVEVectorLengthInBits()
- : kZRegMinSize;
-
-#define TEST_SVE(name) \
- void Test##name(Test* config); \
- Test* test_##name##_vlauto = MakeSVETest(kSVEVectorLengthInBits, \
- "AARCH64_ASM_" #name "_vlauto", \
- &Test##name); \
- void Test##name(Test* config)
-
-#define SVE_SETUP_WITH_FEATURES(...) \
- SETUP_WITH_FEATURES(__VA_ARGS__); \
- USE(config)
-
-#endif
-
-// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
-// is optimised for call-site clarity, not generated code quality, so it doesn't
-// exist in the MacroAssembler itself.
-//
-// Usage:
-//
-// int values[] = { 42, 43, 44 };
-// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
-//
-// The rightmost (highest-indexed) array element maps to the lowest-numbered
-// lane.
-template <typename T, size_t N>
-void InsrHelper(MacroAssembler* masm,
- const ZRegister& zdn,
- const T (&values)[N]) {
- for (size_t i = 0; i < N; i++) {
- masm->Insr(zdn, values[i]);
- }
-}
-
// Conveniently initialise P registers with scalar bit patterns. The destination
// lane size is ignored. This is optimised for call-site clarity, not generated
// code quality.
@@ -3448,8 +3383,10 @@ static void PtrueHelper(Test* config,
typedef void (
MacroAssembler::*AssemblePtrueFn)(const PRegisterWithLaneSize& pd,
int pattern);
- AssemblePtrueFn assemble =
- (s == SetFlags) ? &MacroAssembler::ptrues : &MacroAssembler::ptrue;
+ AssemblePtrueFn assemble = &MacroAssembler::ptrue;
+ if (s == SetFlags) {
+ assemble = &MacroAssembler::ptrues;
+ }
ExactAssemblyScope guard(&masm, 12 * kInstructionSize);
__ msr(NZCV, x20);
@@ -5537,6 +5474,9 @@ TEST_SVE(sve_addpl) {
}
TEST_SVE(sve_calculate_sve_address) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef CalculateSVEAddressMacroAssembler MacroAssembler;
@@ -5646,6 +5586,7 @@ TEST_SVE(sve_calculate_sve_address) {
ASSERT_EQUAL_64(0xabcd404400000000 - 48, x28);
ASSERT_EQUAL_64(0xabcd505500000000 - (48 << 4), x29);
}
+#pragma GCC diagnostic pop
}
TEST_SVE(sve_permute_vector_unpredicated) {
@@ -10549,18 +10490,22 @@ TEST_SVE(sve_ld1rq) {
// Check that all segments match by rotating the vector by one segment,
// eoring, and orring across the vector.
- __ Ext(z4.VnB(), z0.VnB(), z0.VnB(), 16);
+ __ Mov(z4, z0);
+ __ Ext(z4.VnB(), z4.VnB(), z4.VnB(), 16);
__ Eor(z4.VnB(), z4.VnB(), z0.VnB());
__ Orv(b4, p0, z4.VnB());
- __ Ext(z5.VnB(), z1.VnB(), z1.VnB(), 16);
+ __ Mov(z5, z1);
+ __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16);
__ Eor(z5.VnB(), z5.VnB(), z1.VnB());
__ Orv(b5, p0, z5.VnB());
__ Orr(z4, z4, z5);
- __ Ext(z5.VnB(), z2.VnB(), z2.VnB(), 16);
+ __ Mov(z5, z2);
+ __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16);
__ Eor(z5.VnB(), z5.VnB(), z2.VnB());
__ Orv(b5, p0, z5.VnB());
__ Orr(z4, z4, z5);
- __ Ext(z5.VnB(), z3.VnB(), z3.VnB(), 16);
+ __ Mov(z5, z3);
+ __ Ext(z5.VnB(), z5.VnB(), z5.VnB(), 16);
__ Eor(z5.VnB(), z5.VnB(), z3.VnB());
__ Orv(b5, p0, z5.VnB());
__ Orr(z4, z4, z5);
@@ -11681,19 +11626,19 @@ static void SdotUdotHelper(Test* config,
const ZRegister& za,
const ZRegister& zn,
const ZRegister& zm,
- bool is_signed,
- int index) {
- if (is_signed) {
- if (index < 0) {
+ bool is_signed_fn,
+ int index_fn) {
+ if (is_signed_fn) {
+ if (index_fn < 0) {
__ Sdot(zd, za, zn, zm);
} else {
- __ Sdot(zd, za, zn, zm, index);
+ __ Sdot(zd, za, zn, zm, index_fn);
}
} else {
- if (index < 0) {
+ if (index_fn < 0) {
__ Udot(zd, za, zn, zm);
} else {
- __ Udot(zd, za, zn, zm, index);
+ __ Udot(zd, za, zn, zm, index_fn);
}
}
};
@@ -14573,7 +14518,8 @@ TEST_SVE(sve_fcadd) {
__ Sel(z2.VnH(), p3, z1.VnH(), z30.VnH()); // 5i + 0
__ Sel(z3.VnH(), p2, z1.VnH(), z30.VnH()); // 0i + 5
__ Sel(z7.VnH(), p3, z7.VnH(), z0.VnH()); // Ai + 10
- __ Ext(z8.VnB(), z7.VnB(), z7.VnB(), 2);
+ __ Mov(z8, z7);
+ __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 2);
__ Sel(z8.VnH(), p2, z8.VnH(), z30.VnH()); // 0i + A
// (10i + 10) + rotate(5i + 0, 90)
@@ -14615,7 +14561,8 @@ TEST_SVE(sve_fcadd) {
__ Sel(z2.VnS(), p3, z1.VnS(), z30.VnS());
__ Sel(z29.VnS(), p2, z1.VnS(), z30.VnS());
__ Sel(z11.VnS(), p3, z11.VnS(), z0.VnS());
- __ Ext(z12.VnB(), z11.VnB(), z11.VnB(), 4);
+ __ Mov(z12, z11);
+ __ Ext(z12.VnB(), z12.VnB(), z12.VnB(), 4);
__ Sel(z12.VnS(), p2, z12.VnS(), z30.VnS());
__ Fcadd(z8.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
__ Fcadd(z8.VnS(), p0.Merging(), z8.VnS(), z29.VnS(), 270);
@@ -14635,7 +14582,8 @@ TEST_SVE(sve_fcadd) {
__ Sel(z2.VnD(), p3, z1.VnD(), z30.VnD());
__ Sel(z28.VnD(), p2, z1.VnD(), z30.VnD());
__ Sel(z15.VnD(), p3, z15.VnD(), z0.VnD());
- __ Ext(z16.VnB(), z15.VnB(), z15.VnB(), 8);
+ __ Mov(z16, z15);
+ __ Ext(z16.VnB(), z16.VnB(), z16.VnB(), 8);
__ Sel(z16.VnD(), p2, z16.VnD(), z30.VnD());
__ Fcadd(z12.VnD(), p0.Merging(), z0.VnD(), z2.VnD(), 90);
__ Fcadd(z12.VnD(), p0.Merging(), z12.VnD(), z28.VnD(), 270);
@@ -14701,8 +14649,8 @@ TEST_SVE(sve_fcmla_index) {
// Create a reference result from a vector complex multiply.
__ Dup(z6.VnH(), 0);
- __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 0);
- __ Fcmla(z6.VnH(), p0.Merging(), z0.VnH(), z2.VnH(), 90);
+ __ Fcmla(z6.VnH(), p0.Merging(), z6.VnH(), z0.VnH(), z2.VnH(), 0);
+ __ Fcmla(z6.VnH(), p0.Merging(), z6.VnH(), z0.VnH(), z2.VnH(), 90);
// Repeated, but for wider elements.
__ Fdup(z0.VnS(), 42.0);
@@ -14726,8 +14674,8 @@ TEST_SVE(sve_fcmla_index) {
__ Fcmla(z8.VnS(), z0.VnS(), z3.VnS(), 0, 270);
__ Fneg(z8.VnS(), p0.Merging(), z8.VnS());
__ Dup(z9.VnS(), 0);
- __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 0);
- __ Fcmla(z9.VnS(), p0.Merging(), z0.VnS(), z2.VnS(), 90);
+ __ Fcmla(z9.VnS(), p0.Merging(), z9.VnS(), z0.VnS(), z2.VnS(), 0);
+ __ Fcmla(z9.VnS(), p0.Merging(), z9.VnS(), z0.VnS(), z2.VnS(), 90);
END();
if (CAN_RUN()) {
@@ -14770,8 +14718,8 @@ TEST_SVE(sve_fcmla) {
// ... 7 6 5 4 3 2 1 0 <-- element
// ... | 20+A^2 | 8A | 0 | 0 | 20+A^2 | 8A | 0 | 0 | <-- value
__ Dup(z5.VnH(), 0);
- __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 0);
- __ Fcmla(z5.VnH(), p3.Merging(), z4.VnH(), z3.VnH(), 90);
+ __ Fcmla(z5.VnH(), p3.Merging(), z5.VnH(), z4.VnH(), z3.VnH(), 0);
+ __ Fcmla(z5.VnH(), p3.Merging(), z5.VnH(), z4.VnH(), z3.VnH(), 90);
// Move the odd results to the even result positions.
// ... 7 6 5 4 3 2 1 0 <-- element
@@ -14783,8 +14731,8 @@ TEST_SVE(sve_fcmla) {
// ... 7 6 5 4 3 2 1 0 <-- element
// ... | 0 | 0 | -20-A^2 | -8A | 0 | 0 | -20-A^2 | -8A | <-- value
__ Dup(z6.VnH(), 0);
- __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 180);
- __ Fcmla(z6.VnH(), p2.Merging(), z4.VnH(), z3.VnH(), 270);
+ __ Fcmla(z6.VnH(), p2.Merging(), z6.VnH(), z4.VnH(), z3.VnH(), 180);
+ __ Fcmla(z6.VnH(), p2.Merging(), z6.VnH(), z4.VnH(), z3.VnH(), 270);
// Negate the even results. The results in z6 should now match the results
// computed earlier in z5.
@@ -14807,12 +14755,12 @@ TEST_SVE(sve_fcmla) {
__ Punpklo(p2.VnH(), p2.VnB());
__ Punpklo(p3.VnH(), p3.VnB());
__ Dup(z7.VnS(), 0);
- __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 0);
- __ Fcmla(z7.VnS(), p3.Merging(), z4.VnS(), z3.VnS(), 90);
+ __ Fcmla(z7.VnS(), p3.Merging(), z7.VnS(), z4.VnS(), z3.VnS(), 0);
+ __ Fcmla(z7.VnS(), p3.Merging(), z7.VnS(), z4.VnS(), z3.VnS(), 90);
__ Ext(z7.VnB(), z7.VnB(), z7.VnB(), 8);
__ Dup(z8.VnS(), 0);
- __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 180);
- __ Fcmla(z8.VnS(), p2.Merging(), z4.VnS(), z3.VnS(), 270);
+ __ Fcmla(z8.VnS(), p2.Merging(), z8.VnS(), z4.VnS(), z3.VnS(), 180);
+ __ Fcmla(z8.VnS(), p2.Merging(), z8.VnS(), z4.VnS(), z3.VnS(), 270);
__ Fneg(z8.VnS(), p2.Merging(), z8.VnS());
// Double precision computed for even lanes only.
@@ -14827,11 +14775,11 @@ TEST_SVE(sve_fcmla) {
__ Sel(z4.VnD(), p2, z1.VnD(), z2.VnD());
__ Punpklo(p2.VnH(), p2.VnB());
__ Dup(z9.VnD(), 0);
- __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 0);
- __ Fcmla(z9.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 90);
+ __ Fcmla(z9.VnD(), p2.Merging(), z9.VnD(), z4.VnD(), z3.VnD(), 0);
+ __ Fcmla(z9.VnD(), p2.Merging(), z9.VnD(), z4.VnD(), z3.VnD(), 90);
__ Dup(z10.VnD(), 0);
- __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 180);
- __ Fcmla(z10.VnD(), p2.Merging(), z4.VnD(), z3.VnD(), 270);
+ __ Fcmla(z10.VnD(), p2.Merging(), z10.VnD(), z4.VnD(), z3.VnD(), 180);
+ __ Fcmla(z10.VnD(), p2.Merging(), z10.VnD(), z4.VnD(), z3.VnD(), 270);
__ Fneg(z10.VnD(), p2.Merging(), z10.VnD());
END();
@@ -18706,5 +18654,1331 @@ TEST_SVE(sve_prefetch_offset) {
}
}
+TEST_SVE(sve2_match_nmatch) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+
+ START();
+
+ __ Ptrue(p0.VnB());
+ __ Ptrue(p1.VnH());
+ __ Ptrue(p2.VnS());
+
+ // Vector to search is bytes 0 - 7, repeating every eight bytes.
+ __ Index(z0.VnB(), 0, 1);
+ __ Dup(z0.VnD(), z0.VnD(), 0);
+
+ // Elements to find are (repeated) bytes 0 - 3 in the first segment, 4 - 7
+ // in the second, 8 - 11 in the third, etc.
+ __ Index(z1.VnB(), 0, 1);
+ __ Lsr(z1.VnB(), z1.VnB(), 2);
+
+ __ Match(p3.VnB(), p0.Zeroing(), z0.VnB(), z1.VnB());
+ __ Match(p4.VnB(), p1.Zeroing(), z0.VnB(), z1.VnB());
+ __ Nmatch(p0.VnB(), p0.Zeroing(), z0.VnB(), z1.VnB());
+
+ __ Uunpklo(z0.VnH(), z0.VnB());
+ __ Uunpklo(z1.VnH(), z1.VnB());
+
+ __ Match(p5.VnH(), p1.Zeroing(), z0.VnH(), z1.VnH());
+ __ Match(p6.VnH(), p2.Zeroing(), z0.VnH(), z1.VnH());
+ __ Nmatch(p1.VnH(), p1.Zeroing(), z0.VnH(), z1.VnH());
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+
+ int p3_exp[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
+ ASSERT_EQUAL_SVE(p3_exp, p3.VnB());
+ int p4_exp[] = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1};
+ ASSERT_EQUAL_SVE(p4_exp, p4.VnB());
+ int p0_exp[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
+ ASSERT_EQUAL_SVE(p0_exp, p0.VnB());
+
+ int p5_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1};
+ ASSERT_EQUAL_SVE(p5_exp, p5.VnB());
+ int p6_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ ASSERT_EQUAL_SVE(p6_exp, p6.VnB());
+ int p1_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0};
+ ASSERT_EQUAL_SVE(p1_exp, p1.VnB());
+ }
+}
+
+TEST_SVE(sve2_saba_uaba) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+
+ START();
+
+ __ Index(z0.VnB(), 0, 1);
+ __ Dup(z1.VnB(), 0xff);
+ __ Dup(z2.VnB(), 1);
+ __ Uaba(z2.VnB(), z2.VnB(), z0.VnB(), z1.VnB());
+ __ Index(z0.VnB(), 0, -1);
+
+ __ Index(z3.VnH(), 0, 1);
+ __ Index(z4.VnH(), 1, 1);
+ __ Uaba(z3.VnH(), z3.VnH(), z3.VnH(), z4.VnH());
+
+ __ Index(z5.VnS(), 3, 6);
+ __ Index(z6.VnS(), 5, 6);
+ __ Uaba(z5.VnS(), z5.VnS(), z5.VnS(), z6.VnS());
+
+ __ Index(z7.VnD(), 424, 12);
+ __ Index(z8.VnD(), 4242, 12);
+ __ Uaba(z7.VnD(), z7.VnD(), z7.VnD(), z8.VnD());
+
+ __ Index(z9.VnH(), -1, -1);
+ __ Dup(z10.VnB(), 0);
+ __ Saba(z10.VnB(), z10.VnB(), z9.VnB(), z10.VnB());
+ __ Index(z11.VnH(), 0x0101, 1);
+
+ __ Index(z12.VnH(), 0, 1);
+ __ Index(z13.VnH(), 0, -1);
+ __ Saba(z13.VnH(), z13.VnH(), z12.VnH(), z13.VnH());
+
+ __ Index(z14.VnS(), 0, 2);
+ __ Index(z15.VnS(), 0, -2);
+ __ Saba(z15.VnS(), z15.VnS(), z14.VnS(), z15.VnS());
+
+ __ Index(z16.VnD(), 0, 42);
+ __ Index(z17.VnD(), 0, -42);
+ __ Saba(z17.VnD(), z17.VnD(), z16.VnD(), z17.VnD());
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(z0, z2);
+ ASSERT_EQUAL_SVE(z3, z4);
+ ASSERT_EQUAL_SVE(z5, z6);
+ ASSERT_EQUAL_SVE(z7, z8);
+
+ ASSERT_EQUAL_SVE(z10, z11);
+ ASSERT_EQUAL_SVE(z12, z13);
+ ASSERT_EQUAL_SVE(z14, z15);
+ ASSERT_EQUAL_SVE(z16, z17);
+ }
+}
+
+TEST_SVE(sve2_integer_multiply_long_vector) {
+ // The test just check Sqdmull[b|t] and Pmull[b|t], as the way how the element
+ // operating of the other instructions in the group are likewise.
+ int32_t zn_inputs_s[] =
+ {1, -2, 3, -4, 5, -6, 7, -8, INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN};
+
+ int32_t zm_inputs_s[] =
+ {1, 2, 3, 4, 5, 6, 7, 8, INT32_MAX, INT32_MIN, INT32_MAX, INT32_MIN};
+ int64_t sqdmullb_vec_expected_d[] =
+ {-8, -32, -72, -128, RawbitsToInt64(0x8000000100000000), INT64_MAX};
+
+ uint64_t sqdmullt_vec_expected_d[] =
+ {2, 18, 50, 98, 0x8000000100000000, 0x7ffffffe00000002};
+
+ uint64_t pmullb_vec_expected_d[] = {0x00000001fffffffc,
+ 0x00000003fffffff0,
+ 0x000000020000001c,
+ 0x00000007ffffffc0,
+ 0x3fffffff80000000,
+ 0x4000000000000000};
+
+ uint64_t pmullt_vec_expected_d[] = {0x05,
+ 0x11,
+ 0x15,
+ 0x3fffffff80000000,
+ 0x1555555555555555};
+
+ uint64_t sqdmullb_idx_expected_d[] = {0xfffffffffffffff8,
+ 0xfffffffffffffff0,
+ 0xffffffffffffffb8,
+ 0xffffffffffffffa0,
+ 0x8000000100000000,
+ INT64_MAX};
+
+ uint64_t sqdmullt_idx_expected_d[] =
+ {8, // 2 * zn[11] * zm[8] = 2 * 4 * 1
+ 24, // 2 * zn[9] * zm[8] = 2 * 4 * 3
+ 80, // 2 * zn[7] * zm[4] = 2 * 8 * 5
+ 112, // 2 * zn[5] * zm[4] = 2 * 8 * 7
+ 0x7fffffffffffffff, // 2 * zn[3] * zm[0]
+ 0x8000000100000000}; // 2 * zn[1] * zm[0]
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z31.VnS(), zn_inputs_s);
+ InsrHelper(&masm, z30.VnS(), zm_inputs_s);
+
+ __ Sqdmullb(z1.VnD(), z31.VnS(), z30.VnS());
+ __ Sqdmullt(z2.VnD(), z31.VnS(), z30.VnS());
+
+ __ Pmullb(z3.VnD(), z31.VnS(), z30.VnS());
+ __ Pmullt(z4.VnD(), z31.VnS(), z30.VnS());
+
+ __ Mov(z7, z30);
+ __ Mov(z8, z31);
+ __ Sqdmullb(z5.VnD(), z8.VnS(), z7.VnS(), 2);
+ __ Sqdmullt(z6.VnD(), z8.VnS(), z7.VnS(), 0);
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(sqdmullb_vec_expected_d, z1.VnD());
+ ASSERT_EQUAL_SVE(sqdmullt_vec_expected_d, z2.VnD());
+ ASSERT_EQUAL_SVE(pmullb_vec_expected_d, z3.VnD());
+ ASSERT_EQUAL_SVE(pmullt_vec_expected_d, z4.VnD());
+ ASSERT_EQUAL_SVE(sqdmullb_idx_expected_d, z5.VnD());
+ ASSERT_EQUAL_SVE(sqdmullt_idx_expected_d, z6.VnD());
+ }
+}
+
+TEST_SVE(sve2_integer_multiply_add_long_vector) {
+ int32_t zn_inputs_s[] =
+ {1, -2, 3, -4, 5, -6, 7, -8, INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN};
+
+ int32_t zm_inputs_s[] =
+ {1, 2, 3, 4, 5, 6, 7, 8, INT32_MAX, INT32_MIN, INT32_MAX, INT32_MIN};
+
+ int64_t sqdmlalb_vec_expected_d[] =
+ {-3, -28, -69, -126, RawbitsToInt64(0x8000000100000001), INT64_MAX};
+
+ int64_t sqdmlalt_vec_expected_d[] = {-3,
+ 14,
+ 47,
+ 96,
+ RawbitsToInt64(0x80000000ffffffff),
+ static_cast<int64_t>(
+ 0x7ffffffe00000002)};
+
+ int64_t sqdmlalb_idx_expected_d[] =
+ {-11, // za.d[5] + 2 * zn.s[10] * zm.s[8] = 5 + 2 * -2 * 4
+ -28, // za.d[4] + 2 * zn.s[8] * zm.s[8] = 4 + 2 * -4 * 4
+ -93, // za.d[3] + 2 * zn.s[6] * zm.s[4] = 3 + 2 * -6 * 8
+ -126, // za.d[2] + 2 * zn.s[4] * zm.s[4] = 2 + 2 * -8 * 8
+ RawbitsToInt64(0x8000000100000001),
+ INT64_MAX};
+
+ int64_t sqdmlalt_idx_expected_d[] =
+ {1, // za.d[5] + 2 * zn.s[11] * zm.s[9] = -5 + 2 * 1 * 3
+ 14, // za.d[4] + 2 * zn.s[9] * zm.s[9] = -4 + 2 * 3 * 3
+ 67, // za.d[3] + 2 * zn.s[7] * zm.s[5] = -3 + 2 * 5 * 7
+ 96, // za.d[2] + 2 * zn.s[5] * zm.s[5] = -2 + 2 * 7 * 7
+ RawbitsToInt64(0x80000000ffffffff),
+ static_cast<int64_t>(0x7ffffffe00000002)};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z0.VnS(), zn_inputs_s);
+ InsrHelper(&masm, z1.VnS(), zm_inputs_s);
+ __ Index(z2.VnD(), 0, 1);
+ __ Index(z3.VnD(), 0, -1);
+
+ __ Mov(z31, z2);
+ __ Sqdmlalb(z31.VnD(), z31.VnD(), z0.VnS(), z1.VnS());
+ __ Mov(z30, z3);
+ __ Sqdmlalt(z30.VnD(), z30.VnD(), z0.VnS(), z1.VnS());
+ __ Mov(z29, z31);
+ __ Sqdmlslb(z29.VnD(), z29.VnD(), z0.VnS(), z1.VnS());
+ __ Mov(z28, z30);
+ __ Sqdmlslt(z28.VnD(), z28.VnD(), z0.VnS(), z1.VnS());
+
+ __ Sqdmlalb(z27.VnD(), z2.VnD(), z0.VnS(), z1.VnS());
+ __ Sqdmlalt(z26.VnD(), z3.VnD(), z0.VnS(), z1.VnS());
+ __ Sqdmlslb(z25.VnD(), z27.VnD(), z0.VnS(), z1.VnS());
+ __ Sqdmlslt(z24.VnD(), z26.VnD(), z0.VnS(), z1.VnS());
+
+ __ Mov(z23, z2);
+ __ Sqdmlalb(z23.VnD(), z23.VnD(), z0.VnS(), z1.VnS(), 0);
+ __ Mov(z22, z3);
+ __ Sqdmlalt(z22.VnD(), z22.VnD(), z0.VnS(), z1.VnS(), 1);
+ __ Mov(z21, z23);
+ __ Sqdmlslb(z21.VnD(), z21.VnD(), z0.VnS(), z1.VnS(), 0);
+ __ Mov(z20, z22);
+ __ Sqdmlslt(z20.VnD(), z20.VnD(), z0.VnS(), z1.VnS(), 1);
+
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(sqdmlalb_vec_expected_d, z31.VnD());
+ ASSERT_EQUAL_SVE(sqdmlalt_vec_expected_d, z30.VnD());
+ ASSERT_EQUAL_SVE(z2, z29);
+ ASSERT_EQUAL_SVE(z3, z28);
+
+ ASSERT_EQUAL_SVE(z31, z27);
+ ASSERT_EQUAL_SVE(z30, z26);
+ ASSERT_EQUAL_SVE(z29, z25);
+ ASSERT_EQUAL_SVE(z28, z24);
+
+ ASSERT_EQUAL_SVE(sqdmlalb_idx_expected_d, z23.VnD());
+ ASSERT_EQUAL_SVE(sqdmlalt_idx_expected_d, z22.VnD());
+ ASSERT_EQUAL_SVE(z2, z21);
+ ASSERT_EQUAL_SVE(z3, z20);
+ }
+}
+
+TEST_SVE(sve2_ldnt1) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ int data_size = kZRegMaxSizeInBytes * 4;
+ uint8_t* data = new uint8_t[data_size];
+ for (int i = 0; i < data_size; i++) {
+ data[i] = i & 0xff;
+ }
+
+ // Set the base half-way through the buffer so we can use negative indices.
+ __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
+ __ Index(z30.VnD(), x0, 1);
+ __ Ptrue(p0.VnB());
+ __ Punpklo(p1.VnH(), p0.VnB());
+ __ Punpklo(p2.VnH(), p1.VnB());
+ __ Punpklo(p3.VnH(), p2.VnB());
+ __ Punpklo(p4.VnH(), p3.VnB());
+
+ __ Mov(x1, 1);
+ __ Ldnt1b(z0.VnD(), p1.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1b(z1.VnD(), p1.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, -4);
+ __ Ldnt1h(z2.VnD(), p2.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1h(z3.VnD(), p2.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, 16);
+ __ Ldnt1w(z4.VnD(), p3.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1w(z5.VnD(), p3.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, -16);
+ __ Ldnt1d(z6.VnD(), p4.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1d(z7.VnD(), p4.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, 1);
+ __ Ldnt1sb(z8.VnD(), p0.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1sb(z9.VnD(), p0.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, -4);
+ __ Ldnt1sh(z10.VnD(), p2.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1sh(z11.VnD(), p2.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ __ Mov(x1, 16);
+ __ Ldnt1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(z30.VnD(), x1));
+ __ Ld1sw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x1, z30.VnD()));
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_SVE(z0, z1);
+ ASSERT_EQUAL_SVE(z2, z3);
+ ASSERT_EQUAL_SVE(z4, z5);
+ ASSERT_EQUAL_SVE(z6, z7);
+ ASSERT_EQUAL_SVE(z8, z9);
+ ASSERT_EQUAL_SVE(z10, z11);
+ ASSERT_EQUAL_SVE(z12, z13);
+ }
+}
+
+TEST_SVE(sve2_stnt1) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ int data_size = kZRegMaxSizeInBytes * 4;
+ uint8_t* data = new uint8_t[data_size];
+
+ // Set the base half-way through the buffer so we can use negative indices.
+ __ Mov(x0, reinterpret_cast<uintptr_t>(&data[data_size / 2]));
+ __ Ptrue(p0.VnB());
+ __ Punpklo(p1.VnH(), p0.VnB());
+ __ Punpklo(p2.VnH(), p1.VnB());
+ __ Punpklo(p3.VnH(), p2.VnB());
+ __ Punpklo(p4.VnH(), p3.VnB());
+ __ Dup(z0.VnB(), 0xaa);
+ __ Dup(z1.VnB(), 0x55);
+ __ Rdvl(x1, 1);
+ __ Mov(x3, 0);
+
+ // Put store addresses into z30, and a small offset in x4.
+ __ Index(z30.VnD(), x0, 1);
+ __ Mov(x4, 2);
+
+ // Store an entire vector of 0xaa to the buffer, then a smaller scatter store
+ // of 0x55 using Stnt1b.
+ __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4));
+ __ Stnt1b(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4));
+
+ // Load the entire vector back from the buffer.
+ __ Ld1b(z2.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4));
+
+ // Construct a predicate that reflects the number of bytes stored by Stnt1b,
+ // based on the current VL, and use Sel to obtain a reference vector for
+ // comparison.
+ __ Lsr(x2, x1, 3);
+ __ Whilelo(p5.VnB(), x3, x2);
+ __ Sel(z3.VnB(), p5.Merging(), z1.VnB(), z0.VnB());
+
+ // Repeat for larger element sizes.
+ __ Mov(x4, -4);
+ __ Index(z30.VnD(), x0, 2);
+ __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4));
+ __ Stnt1h(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4));
+ __ Ld1b(z4.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4));
+ __ Lsr(x2, x1, 2);
+ __ Whilelo(p5.VnB(), x3, x2);
+ __ Sel(z5.VnB(), p5.Merging(), z1.VnB(), z0.VnB());
+
+ __ Mov(x4, 16);
+ __ Index(z30.VnD(), x0, 4);
+ __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4));
+ __ Stnt1w(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4));
+ __ Ld1b(z6.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4));
+ __ Lsr(x2, x1, 1);
+ __ Whilelo(p5.VnB(), x3, x2);
+ __ Sel(z7.VnB(), p5.Merging(), z1.VnB(), z0.VnB());
+
+ __ Mov(x4, -16);
+ __ Index(z30.VnD(), x0, 8);
+ __ St1b(z0.VnB(), p0, SVEMemOperand(x0, x4));
+ __ Stnt1d(z1.VnD(), p0, SVEMemOperand(z30.VnD(), x4));
+ __ Ld1b(z8.VnB(), p0.Zeroing(), SVEMemOperand(x0, x4));
+ __ Whilelo(p5.VnB(), x3, x1);
+ __ Sel(z9.VnB(), p5.Merging(), z1.VnB(), z0.VnB());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_SVE(z2, z3);
+ ASSERT_EQUAL_SVE(z4, z5);
+ ASSERT_EQUAL_SVE(z6, z7);
+ ASSERT_EQUAL_SVE(z8, z9);
+ }
+}
+
+TEST_SVE(sve2_while_simple) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+
+ START();
+ __ Mov(x0, 1);
+ __ Mov(x1, 0);
+ __ Mov(x2, 3);
+
+ __ Whilehi(p0.VnB(), x0, x1);
+ __ Whilehs(p1.VnB(), x0, x1);
+ __ Whilehi(p2.VnB(), x2, x1);
+ __ Whilehs(p3.VnB(), x2, x1);
+ __ Whilehi(p4.VnB(), x2, x0);
+ __ Whilehs(p5.VnB(), x2, x0);
+
+ __ Whilegt(p6.VnB(), x0, x1);
+ __ Whilege(p7.VnB(), x0, x1);
+ __ Whilegt(p8.VnB(), x2, x1);
+ __ Whilege(p9.VnB(), x2, x1);
+ __ Whilegt(p10.VnB(), x2, x0);
+ __ Whilege(p11.VnB(), x2, x0);
+
+ __ Mov(x4, 0x80000000);
+ __ Mov(x5, 0x80000001);
+ __ Whilege(p12.VnB(), w5, w4);
+ __ Whilegt(p13.VnB(), w5, w4);
+
+ __ Mov(x6, 0x8000000000000000);
+ __ Mov(x7, 0x8000000000000001);
+ __ Whilege(p14.VnB(), x7, x6);
+ __ Whilegt(p15.VnB(), x7, x6);
+
+ for (int i = 0; i < 16; i++) {
+ __ Rev(PRegister(i).VnB(), PRegister(i).VnB());
+ }
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ int p0_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ int p1_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ int p2_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
+ int p3_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ int p4_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
+ int p5_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
+ int p6_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ int p7_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
+ int p8_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
+ int p9_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
+ int p10_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1};
+ int p11_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1};
+ int p12_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ int p13_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ int p14_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ int p15_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ ASSERT_EQUAL_SVE(p0_exp, p0.VnB());
+ ASSERT_EQUAL_SVE(p1_exp, p1.VnB());
+ ASSERT_EQUAL_SVE(p2_exp, p2.VnB());
+ ASSERT_EQUAL_SVE(p3_exp, p3.VnB());
+ ASSERT_EQUAL_SVE(p4_exp, p4.VnB());
+ ASSERT_EQUAL_SVE(p5_exp, p5.VnB());
+ ASSERT_EQUAL_SVE(p6_exp, p6.VnB());
+ ASSERT_EQUAL_SVE(p7_exp, p7.VnB());
+ ASSERT_EQUAL_SVE(p8_exp, p8.VnB());
+ ASSERT_EQUAL_SVE(p9_exp, p9.VnB());
+ ASSERT_EQUAL_SVE(p10_exp, p10.VnB());
+ ASSERT_EQUAL_SVE(p11_exp, p11.VnB());
+ ASSERT_EQUAL_SVE(p12_exp, p12.VnB());
+ ASSERT_EQUAL_SVE(p13_exp, p13.VnB());
+ ASSERT_EQUAL_SVE(p14_exp, p14.VnB());
+ ASSERT_EQUAL_SVE(p15_exp, p15.VnB());
+ }
+}
+
+TEST_SVE(sve2_whilerw_whilewr_simple) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+
+ START();
+ __ Mov(x0, 0);
+ __ Mov(x1, 1);
+ __ Mov(x2, 3);
+
+ __ Whilerw(p0.VnB(), x0, x0);
+ __ Whilerw(p1.VnB(), x0, x1);
+ __ Whilerw(p2.VnB(), x1, x0);
+
+ __ Whilewr(p3.VnB(), x0, x0);
+ __ Whilewr(p4.VnB(), x0, x1);
+ __ Whilewr(p5.VnB(), x1, x0);
+
+ __ Whilewr(p6.VnH(), x1, x1);
+ __ Whilewr(p7.VnH(), x1, x2);
+ __ Whilewr(p8.VnH(), x2, x1);
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ int p0_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ ASSERT_EQUAL_SVE(p0_exp, p0.VnB());
+ int p1_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ ASSERT_EQUAL_SVE(p1_exp, p1.VnB());
+ int p2_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ ASSERT_EQUAL_SVE(p2_exp, p2.VnB());
+ int p3_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ ASSERT_EQUAL_SVE(p3_exp, p3.VnB());
+ int p4_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ ASSERT_EQUAL_SVE(p4_exp, p4.VnB());
+ int p5_exp[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+ ASSERT_EQUAL_SVE(p5_exp, p5.VnB());
+ int p6_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+ ASSERT_EQUAL_SVE(p6_exp, p6.VnB());
+ int p7_exp[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ ASSERT_EQUAL_SVE(p7_exp, p7.VnB());
+ int p8_exp[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+ ASSERT_EQUAL_SVE(p8_exp, p8.VnB());
+ }
+}
+
+TEST_SVE(sve2_sqrdcmlah) {
+ int32_t zn_inputs[] = {-1, -2, -3, -4, 1, 2, 3, 4};
+ int32_t zm_inputs[] = {-1, -2, 3, 4, 1, 2, -3, -4};
+ int32_t za_inputs[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t zd_000_expected[] =
+ {1025, 2050, -6141, -8188, 1029, 2054, -6137, -8184};
+ int32_t zd_090_expected[] =
+ {1025, -510, -6141, 4612, 1029, -506, -6137, 4616};
+ int32_t zd_180_expected[] =
+ {-1023, -2046, 6147, 8196, -1019, -2042, 6151, 8200};
+ int32_t zd_270_expected[] =
+ {-1023, 514, 6147, -4604, -1019, 518, 6151, -4600};
+ int32_t zd_0_270_expected[] =
+ {2049, -1534, 6147, -4604, 2053, -1530, 6151, -4600};
+ int32_t zd_3_090_expected[] =
+ {1025, -510, 3075, -1532, 1029, -506, 3079, -1528};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z0.VnS(), zn_inputs);
+ InsrHelper(&masm, z1.VnS(), zm_inputs);
+ InsrHelper(&masm, z31.VnS(), za_inputs);
+
+ // When the value in operands is small, shift left a random value so that it
+ // can affect the result in destination.
+ int shift = 20;
+ __ Lsl(z0.VnS(), z0.VnS(), shift);
+ __ Lsl(z1.VnS(), z1.VnS(), shift);
+
+ __ Mov(z10, z31);
+ __ Sqrdcmlah(z10.VnS(), z10.VnS(), z0.VnS(), z1.VnS(), 0);
+
+ __ Mov(z11, z31);
+ __ Sqrdcmlah(z11.VnS(), z11.VnS(), z0.VnS(), z1.VnS(), 90);
+
+ __ Mov(z12, z31);
+ __ Sqrdcmlah(z12.VnS(), z12.VnS(), z0.VnS(), z1.VnS(), 180);
+
+ __ Mov(z13, z31);
+ __ Sqrdcmlah(z13.VnS(), z13.VnS(), z0.VnS(), z1.VnS(), 270);
+
+ __ Sqrdcmlah(z14.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 0);
+ __ Sqrdcmlah(z15.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 90);
+ __ Sqrdcmlah(z16.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 180);
+ __ Sqrdcmlah(z17.VnS(), z31.VnS(), z0.VnS(), z1.VnS(), 270);
+
+ __ Mov(z18, z31);
+ __ Sqrdcmlah(z18.VnS(), z18.VnS(), z0.VnS(), z1.VnS(), 0, 270);
+
+ __ Mov(z19, z31);
+ __ Sqrdcmlah(z19.VnS(), z19.VnS(), z0.VnS(), z1.VnS(), 1, 90);
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(zd_000_expected, z10.VnS());
+ ASSERT_EQUAL_SVE(zd_090_expected, z11.VnS());
+ ASSERT_EQUAL_SVE(zd_180_expected, z12.VnS());
+ ASSERT_EQUAL_SVE(zd_270_expected, z13.VnS());
+
+ ASSERT_EQUAL_SVE(z14, z10);
+ ASSERT_EQUAL_SVE(z15, z11);
+ ASSERT_EQUAL_SVE(z16, z12);
+ ASSERT_EQUAL_SVE(z17, z13);
+
+ ASSERT_EQUAL_SVE(zd_0_270_expected, z18.VnS());
+ ASSERT_EQUAL_SVE(zd_3_090_expected, z19.VnS());
+ }
+}
+
+TEST_SVE(sve2_sqrdmlah) {
+ uint16_t zn_inputs_h[] = {0x7ffe, 0x7ffd, 0x7ffd, 0x7ffd, 0x8000,
+ 0x7fff, 0x7ffe, 0x7ffe, 0x8001, 0x8000,
+ 0x7ffd, 0x7ffd, 0x7ffd, 0x5555, 0x5555,
+ 0x5555, 0x8000, 0x8000, 0xaaaa, 0x8001};
+
+ uint16_t zm_inputs_h[] = {0x7ffd, 0x7fff, 0x7ffe, 0x7ffd, 0x8001,
+ 0x7fff, 0x7fff, 0x7ffe, 0x8000, 0x8000,
+ 0xaaaa, 0x0001, 0x0001, 0xaaaa, 0xaaaa,
+ 0xcccc, 0x8000, 0x8000, 0x8000, 0x8001};
+
+ uint16_t za_inputs_h[] = {0x1010, 0x1010, 0x1010, 0x1010, 0x1010,
+ 0x1010, 0x1010, 0x1010, 0x8000, 0x8011,
+ 0x8006, 0xff7d, 0xfeff, 0xaabc, 0xaabb,
+ 0x9c72, 0x8000, 0x0000, 0x8000, 0xffff};
+
+ uint16_t zd_expected_h[] = {0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
+ 0x7fff, 0x7fff, 0x7fff, 0xffff, 0x0011,
+ 0x8000, 0xff7e, 0xff00, 0x8000, 0x8000,
+ 0x8000, 0x0000, 0x7fff, 0xd556, 0x7ffd};
+
+ uint32_t zn_inputs_s[] = {0x04000000,
+ 0x80000000,
+ 0x04000000,
+ 0x80000000,
+ 0x80000000,
+ 0x80000001,
+ 0x7fffffff,
+ 0x80000000,
+ 0x7ffffffe,
+ 0x7ffffffd,
+ 0x7ffffffd,
+ 0x7ffffffd};
+
+ uint32_t zm_inputs_s[] = {0x00000020,
+ 0x80000000,
+ 0x00000010,
+ 0x80000000,
+ 0x7fffffff,
+ 0x80000000,
+ 0x80000000,
+ 0x80000001,
+ 0x7ffffffd,
+ 0x7fffffff,
+ 0x7ffffffe,
+ 0x7ffffffd};
+
+ uint32_t za_inputs_s[] = {0x00000000,
+ 0x00000000,
+ 0x00000020,
+ 0x00108000,
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001,
+ 0x10101010,
+ 0x10101010,
+ 0x10101010,
+ 0x10101010};
+
+ uint32_t zd_expected_s[] = {0x00000001,
+ 0x7fffffff,
+ 0x00000021,
+ 0x7fffffff,
+ 0x80000001,
+ 0x7fffffff,
+ 0x80000001,
+ 0x7fffffff,
+ 0x7fffffff,
+ 0x7fffffff,
+ 0x7fffffff,
+ 0x7fffffff};
+
+ uint64_t zn_inputs_d[] = {0x0400000000000000, 0x8000000000000000,
+ 0x0400000000000000, 0x8000000000000000,
+ 0x8000000000000000, 0x8000000000000001,
+ 0x7fffffffffffffff, 0x8000000000000000,
+ 0x7ffffffffffffffe, 0x7ffffffffffffffd,
+ 0x7ffffffffffffffd, 0x7ffffffffffffffd,
+ 0xf1299accc9186169, 0xd529d2675ee9da21,
+ 0x1a10b5d60b92dcf9, 0xfb1d358e0e6455b1,
+ 0x8eb7721078bdc589, 0x4171509750ded141,
+ 0x8eb7721078bdc589, 0x4171509750ded141};
+
+ uint64_t zm_inputs_d[] = {0x0000000000000020, 0x8000000000000000,
+ 0x0000000000000010, 0x8000000000000000,
+ 0x7fffffffffffffff, 0x8000000000000000,
+ 0x8000000000000000, 0x8000000000000001,
+ 0x7ffffffffffffffd, 0x7fffffffffffffff,
+ 0x7ffffffffffffffe, 0x7ffffffffffffffd,
+ 0x30b940efe73f180e, 0x3bc1ff1e52a99b66,
+ 0x40de5c9793535a5e, 0x24752faf47bdddb6,
+ 0x162663016b07e5ae, 0x1de34b56f3d22006,
+ 0x8eb7721078bdc589, 0x4171509750ded141};
+
+ uint64_t za_inputs_d[] = {0x0000000000000000, 0x0000000000000000,
+ 0x0000000000000020, 0x0010108000000000,
+ 0x0000000000000000, 0x0000000000000001,
+ 0x0000000000000000, 0x0000000000000001,
+ 0x1010101010101010, 0x1010101010101010,
+ 0x1010101010101010, 0x1010101010101010,
+ 0xb18253371b2c2c77, 0xa70de31e6645eaef,
+ 0xda817198c0318487, 0x9fd9e6b8e04b42ff,
+ 0xced1f6b7119ab197, 0x01ae051a85509b0f,
+ 0x01a211e9352f7927, 0x7667b70a5b13749f};
+
+ uint64_t zd_expected_d[] = {0x0000000000000001, 0x7fffffffffffffff,
+ 0x0000000000000021, 0x7fffffffffffffff,
+ 0x8000000000000001, 0x7fffffffffffffff,
+ 0x8000000000000001, 0x7fffffffffffffff,
+ 0x7fffffffffffffff, 0x7fffffffffffffff,
+ 0x7fffffffffffffff, 0x7fffffffffffffff,
+ 0xabdc73dea0d72a35, 0x930e3dc877301966,
+ 0xe7b7145a059f8a9f, 0x9e75a4a9d10cf8af,
+ 0xbb378528642d2581, 0x10f5e6d693ffddf3,
+ 0x65e455a46adc091c, 0x7fffffffffffffff};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z0.VnH(), zn_inputs_h);
+ InsrHelper(&masm, z1.VnH(), zm_inputs_h);
+ InsrHelper(&masm, z2.VnH(), za_inputs_h);
+
+ __ Sqrdmlah(z2.VnH(), z2.VnH(), z0.VnH(), z1.VnH());
+
+ InsrHelper(&masm, z3.VnS(), zn_inputs_s);
+ InsrHelper(&masm, z4.VnS(), zm_inputs_s);
+ InsrHelper(&masm, z5.VnS(), za_inputs_s);
+
+ __ Sqrdmlah(z5.VnS(), z5.VnS(), z3.VnS(), z4.VnS());
+
+ InsrHelper(&masm, z6.VnD(), zn_inputs_d);
+ InsrHelper(&masm, z7.VnD(), zm_inputs_d);
+ InsrHelper(&masm, z8.VnD(), za_inputs_d);
+
+ __ Sqrdmlah(z8.VnD(), z8.VnD(), z6.VnD(), z7.VnD());
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_SVE(zd_expected_h, z2.VnH());
+ ASSERT_EQUAL_SVE(zd_expected_s, z5.VnS());
+ ASSERT_EQUAL_SVE(zd_expected_d, z8.VnD());
+ }
+}
+
+TEST_SVE(sve2_cmla) {
+ int32_t zn_inputs_s[] = {-2, -4, -6, -8, 2, 4, 6, 8};
+ int32_t zm_inputs_s[] = {-2, -4, -6, -8, 2, 4, 6, 8};
+ int32_t zda_inputs_s[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ int32_t zd_000_expected[] = {9, 18, 51, 68, 13, 22, 55, 72};
+ int32_t zd_090_expected[] = {9, -2, 51, -32, 13, 2, 55, -28};
+ int32_t zd_180_expected[] = {-7, -14, -45, -60, -3, -10, -41, -56};
+ int32_t zd_270_expected[] = {-7, 6, -45, 40, -3, 10, -41, 44};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z31.VnS(), zn_inputs_s);
+ InsrHelper(&masm, z30.VnS(), zm_inputs_s);
+
+ InsrHelper(&masm, z0.VnS(), zda_inputs_s);
+ __ Mov(z29, z0);
+ __ Cmla(z0.VnS(), z0.VnS(), z31.VnS(), z30.VnS(), 0);
+
+ InsrHelper(&masm, z1.VnS(), zda_inputs_s);
+ __ Mov(z28, z1);
+ __ Cmla(z1.VnS(), z1.VnS(), z31.VnS(), z30.VnS(), 90);
+
+ InsrHelper(&masm, z2.VnS(), zda_inputs_s);
+ __ Mov(z27, z2);
+ __ Cmla(z2.VnS(), z2.VnS(), z31.VnS(), z30.VnS(), 180);
+
+ InsrHelper(&masm, z3.VnS(), zda_inputs_s);
+ __ Mov(z26, z3);
+ __ Cmla(z3.VnS(), z3.VnS(), z31.VnS(), z30.VnS(), 270);
+
+ __ Cmla(z4.VnS(), z29.VnS(), z31.VnS(), z30.VnS(), 0);
+ __ Cmla(z5.VnS(), z28.VnS(), z31.VnS(), z30.VnS(), 90);
+ __ Cmla(z6.VnS(), z27.VnS(), z31.VnS(), z30.VnS(), 180);
+ __ Cmla(z7.VnS(), z26.VnS(), z31.VnS(), z30.VnS(), 270);
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(zd_000_expected, z0.VnS());
+ ASSERT_EQUAL_SVE(zd_090_expected, z1.VnS());
+ ASSERT_EQUAL_SVE(zd_180_expected, z2.VnS());
+ ASSERT_EQUAL_SVE(zd_270_expected, z3.VnS());
+
+ ASSERT_EQUAL_SVE(z4, z0);
+ ASSERT_EQUAL_SVE(z5, z1);
+ ASSERT_EQUAL_SVE(z6, z2);
+ ASSERT_EQUAL_SVE(z7, z3);
+ }
+}
+
+TEST_SVE(sve2_integer_saturating_multiply_add_long) {
+ int32_t zn_bottom_inputs[] =
+ {-2, -4, -6, -8, INT32_MAX, INT32_MIN, INT32_MIN};
+
+ int32_t zm_top_inputs[] = {1, 3, 5, 7, INT32_MAX, INT32_MAX, INT32_MIN};
+
+ int64_t sqdmlalbt_expected[] = {2,
+ -19,
+ -56,
+ -109,
+ static_cast<int64_t>(0x7ffffffe00000004),
+ RawbitsToInt64(0x8000000100000001),
+ INT64_MAX};
+
+ int64_t sqdmlslbt_expected[] = {-2,
+ 19,
+ 56,
+ 109,
+ RawbitsToInt64(0x80000001fffffffc),
+ static_cast<int64_t>(0x7ffffffeffffffff),
+ RawbitsToInt64(0x8000000000000001)};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z31.VnS(), zn_bottom_inputs);
+ InsrHelper(&masm, z30.VnS(), zm_top_inputs);
+
+ __ Dup(z29.VnD(), 0);
+ __ Zip1(z31.VnS(), z31.VnS(), z29.VnS());
+ __ Zip1(z30.VnS(), z29.VnS(), z30.VnS());
+
+ // Initialise inputs for za.
+ __ Index(z1.VnD(), 0, 1);
+ __ Index(z2.VnD(), 0, -1);
+
+ __ Sqdmlalbt(z1.VnD(), z1.VnD(), z31.VnS(), z30.VnS());
+ __ Sqdmlslbt(z2.VnD(), z2.VnD(), z31.VnS(), z30.VnS());
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(sqdmlalbt_expected, z1.VnD());
+ ASSERT_EQUAL_SVE(sqdmlslbt_expected, z2.VnD());
+ }
+}
+
+TEST_SVE(sve2_floating_point_multiply_add_long_vector) {
+ uint16_t zn_inputs[] = {Float16ToRawbits(Float16(1000)),
+ Float16ToRawbits(Float16(2000)),
+ Float16ToRawbits(Float16(0.5)),
+ Float16ToRawbits(Float16(-0.5)),
+ Float16ToRawbits(Float16(14)),
+ Float16ToRawbits(Float16(-14)),
+ Float16ToRawbits(kFP16PositiveInfinity),
+ Float16ToRawbits(kFP16NegativeInfinity)};
+
+ uint16_t zm_inputs[] = {Float16ToRawbits(Float16(10)),
+ Float16ToRawbits(Float16(-10)),
+ Float16ToRawbits(Float16(10)),
+ Float16ToRawbits(Float16(-10)),
+ Float16ToRawbits(Float16(10)),
+ Float16ToRawbits(Float16(-10)),
+ Float16ToRawbits(Float16(10)),
+ Float16ToRawbits(Float16(-10))};
+
+ uint32_t za_inputs[] = {FloatToRawbits(1.0f),
+ FloatToRawbits(-1.0f),
+ FloatToRawbits(1.0f),
+ FloatToRawbits(-1.0f)};
+
+ uint32_t fmlalb_zd_expected[] = {0xc69c3e00, // -19999
+ 0x40800000, // 4
+ 0x430d0000, // 141
+ FloatToRawbits(kFP32PositiveInfinity)};
+
+ uint32_t fmlalt_zd_expected[] = {0x461c4400, // 10001
+ 0x40800000, // 4
+ 0x430d0000, // 141
+ FloatToRawbits(kFP32PositiveInfinity)};
+
+ uint32_t fmlslb_zd_expected[] = {0x469c4200, // 20001
+ 0xc0c00000, // -6
+ 0xc30b0000, // -139
+ FloatToRawbits(kFP32NegativeInfinity)};
+
+ uint32_t fmlslt_zd_expected[] = {0xc61c3c00, // -9999
+ 0xc0c00000, // -6
+ 0xc30b0000, // -139
+ FloatToRawbits(kFP32NegativeInfinity)};
+
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+ START();
+
+ InsrHelper(&masm, z31.VnH(), zn_inputs);
+ InsrHelper(&masm, z30.VnH(), zm_inputs);
+ InsrHelper(&masm, z29.VnS(), za_inputs);
+
+ __ Mov(z0, z29);
+ __ Fmlalb(z0.VnS(), z0.VnS(), z31.VnH(), z30.VnH());
+
+ __ Mov(z1, z29);
+ __ Fmlalt(z1.VnS(), z1.VnS(), z31.VnH(), z30.VnH());
+
+ __ Mov(z2, z29);
+ __ Fmlslb(z2.VnS(), z2.VnS(), z31.VnH(), z30.VnH());
+
+ __ Mov(z3, z29);
+ __ Fmlslt(z3.VnS(), z3.VnS(), z31.VnH(), z30.VnH());
+
+ __ Fmlalb(z4.VnS(), z29.VnS(), z31.VnH(), z30.VnH());
+ __ Fmlalt(z5.VnS(), z29.VnS(), z31.VnH(), z30.VnH());
+ __ Fmlslb(z6.VnS(), z29.VnS(), z31.VnH(), z30.VnH());
+ __ Fmlslt(z7.VnS(), z29.VnS(), z31.VnH(), z30.VnH());
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_SVE(fmlalb_zd_expected, z0.VnS());
+ ASSERT_EQUAL_SVE(fmlalt_zd_expected, z1.VnS());
+ ASSERT_EQUAL_SVE(fmlslb_zd_expected, z2.VnS());
+ ASSERT_EQUAL_SVE(fmlslt_zd_expected, z3.VnS());
+
+ ASSERT_EQUAL_SVE(z4, z0);
+ ASSERT_EQUAL_SVE(z5, z1);
+ ASSERT_EQUAL_SVE(z6, z2);
+ ASSERT_EQUAL_SVE(z7, z3);
+ }
+}
+
+TEST_SVE(sve2_flogb_simple) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2);
+
+ START();
+ __ Ptrue(p0.VnB());
+ __ Index(z0.VnS(), -4, 1);
+ __ Mov(z1.VnS(), 0);
+ __ Mov(z2.VnD(), 0x000fffffffffffff);
+ __ Mov(z3.VnD(), 0x0010000000000000);
+ __ Scvtf(z0.VnS(), p0.Merging(), z0.VnS());
+ __ Scvtf(z1.VnS(), p0.Merging(), z1.VnS());
+ __ Fdiv(z1.VnS(), p0.Merging(), z0.VnS(), z1.VnS());
+ __ Flogb(z0.VnS(), p0.Merging(), z0.VnS());
+ __ Flogb(z1.VnS(), p0.Merging(), z1.VnS());
+ __ Flogb(z2.VnD(), p0.Merging(), z2.VnD());
+ __ Flogb(z3.VnD(), p0.Merging(), z3.VnD());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ uint64_t expected_z0[] = {0x0000000200000002,
+ 0x0000000200000002,
+ 0x0000000100000001,
+ 0x0000000080000000,
+ 0x0000000000000001,
+ 0x0000000100000002};
+ ASSERT_EQUAL_SVE(expected_z0, z0.VnD());
+
+ uint64_t expected_z1[] = {0x7fffffff7fffffff,
+ 0x7fffffff7fffffff,
+ 0x7fffffff7fffffff,
+ 0x7fffffff80000000,
+ 0x7fffffff7fffffff,
+ 0x7fffffff7fffffff};
+ ASSERT_EQUAL_SVE(expected_z1, z1.VnD());
+
+ uint64_t expected_z2[] = {0xfffffffffffffc01,
+ 0xfffffffffffffc01,
+ 0xfffffffffffffc01,
+ 0xfffffffffffffc01};
+ ASSERT_EQUAL_SVE(expected_z2, z2.VnD());
+
+ uint64_t expected_z3[] = {0xfffffffffffffc02,
+ 0xfffffffffffffc02,
+ 0xfffffffffffffc02,
+ 0xfffffffffffffc02};
+ ASSERT_EQUAL_SVE(expected_z3, z3.VnD());
+ }
+}
+
+TEST_SVE(neon_matmul) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVEI8MM,
+ CPUFeatures::kNEON,
+ CPUFeatures::kI8MM);
+
+ // Test Neon integer matrix multiply against SVE.
+ START();
+ __ Movi(v0.V2D(), 0xffeeddccbbaa9988, 0x77665544332211);
+ __ Movi(v1.V2D(), 0xaa5555aa55555555, 0x55aaaa55aaaaaa);
+ __ Movi(v2.V2D(), 0, 0);
+ __ Movi(v3.V2D(), 0, 0);
+ __ Movi(v4.V2D(), 0, 0);
+ __ Movi(v5.V2D(), 0, 0);
+ __ Movi(v6.V2D(), 0, 0);
+ __ Movi(v7.V2D(), 0, 0);
+
+ __ Smmla(v2.V4S(), v0.V16B(), v1.V16B());
+ __ Smmla(z3.VnS(), z3.VnS(), z0.VnB(), z1.VnB());
+ __ Ummla(v4.V4S(), v0.V16B(), v1.V16B());
+ __ Ummla(z5.VnS(), z5.VnS(), z0.VnB(), z1.VnB());
+ __ Usmmla(v6.V4S(), v0.V16B(), v1.V16B());
+ __ Usmmla(z7.VnS(), z7.VnS(), z0.VnB(), z1.VnB());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ // The inputs as Z registers are zero beyond the least-significant 128 bits,
+ // so the Neon and SVE results should be equal for any VL.
+ ASSERT_EQUAL_SVE(z3, z2);
+ ASSERT_EQUAL_SVE(z5, z4);
+ ASSERT_EQUAL_SVE(z7, z6);
+ }
+}
+
+TEST_SVE(sudot_usdot) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kSVEI8MM);
+
+ START();
+ __ Ptrue(p0.VnB());
+ __ Index(z0.VnS(), -424242, 77777);
+ __ Index(z1.VnB(), 127, -1);
+ __ Sqabs(z1.VnB(), p0.Merging(), z1.VnB());
+ __ Index(z2.VnB(), 0, 1);
+ __ Sqabs(z2.VnB(), p0.Merging(), z2.VnB());
+ __ Index(z3.VnB(), -128, 1);
+ __ Mov(z4.VnD(), 0);
+
+ // Test Usdot against Udot/Sdot over the range of inputs where they should be
+ // equal.
+ __ Usdot(z5.VnS(), z0.VnS(), z1.VnB(), z2.VnB());
+ __ Udot(z6.VnS(), z0.VnS(), z1.VnB(), z2.VnB());
+ __ Usdot(z7.VnS(), z0.VnS(), z1.VnB(), z3.VnB());
+ __ Sdot(z8.VnS(), z0.VnS(), z1.VnB(), z3.VnB());
+
+ // Construct values which, when interpreted correctly as signed/unsigned,
+ // should give a zero result for dot product.
+ __ Mov(z10.VnS(), 0x8101ff40); // [-127, 1, -1, 64] as signed bytes.
+ __ Mov(z11.VnS(), 0x02fe8002); // [2, 254, 128, 2] as unsigned bytes.
+ __ Usdot(z12.VnS(), z4.VnS(), z11.VnB(), z10.VnB());
+ __ Usdot(z13.VnS(), z4.VnS(), z10.VnB(), z11.VnB());
+
+ // Construct a vector with duplicated values across segments. This allows
+ // testing indexed dot product against the already tested variant.
+ __ Mov(z14.VnS(), 1);
+ __ Mul(z15.VnS(), z14.VnS(), z3.VnS(), 1);
+
+ __ Usdot(z16.VnS(), z0.VnS(), z3.VnB(), z3.VnB(), 1);
+ __ Usdot(z17.VnS(), z0.VnS(), z3.VnB(), z15.VnB());
+ __ Sudot(z18.VnS(), z0.VnS(), z3.VnB(), z3.VnB(), 1);
+ __ Usdot(z19.VnS(), z0.VnS(), z15.VnB(), z3.VnB());
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_SVE(z6, z5);
+ ASSERT_EQUAL_SVE(z8, z7);
+ ASSERT_EQUAL_SVE(z4, z12);
+
+ uint64_t z13_expected[] = {0xffff8200ffff8200, 0xffff8200ffff8200};
+ ASSERT_EQUAL_SVE(z13_expected, z13.VnD());
+
+ ASSERT_EQUAL_SVE(z17, z16);
+ ASSERT_EQUAL_SVE(z19, z18);
+ }
+}
+
+// Manually constructed simulator test to avoid creating a VL128 variant.
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+void Testsve_fmatmul(Test* config) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM);
+
+ // Only double-precision matrix multiply is tested here. Single-precision is
+ // tested in the simulator tests using a generated sequence. The (templated)
+ // code used in the simulator for both cases is the same, which is why the
+ // tests here don't need to be comprehensive.
+ START();
+ Label vl_too_short;
+ __ Rdvl(x0, 1);
+ __ Cmp(x0, 32);
+ __ B(lt, &vl_too_short); // Skip testing VL128.
+
+ __ Fdup(z0.VnD(), 1.0);
+ __ Fdup(z1.VnD(), 2.0);
+ __ Mov(z2.VnD(), 0);
+
+ // Build 2x2 identity matrix in z3.
+ Label iden_loop;
+ __ Lsr(x0, x0, 5);
+ __ Bind(&iden_loop);
+ __ Insr(z3.VnD(), d0);
+ __ Insr(z3.VnD(), d2);
+ __ Insr(z3.VnD(), d2);
+ __ Insr(z3.VnD(), d0);
+ __ Sub(x0, x0, 1);
+ __ Cbnz(x0, &iden_loop);
+
+ __ Fmmla(z1.VnD(), z1.VnD(), z0.VnD(), z0.VnD());
+ __ Fmmla(z2.VnD(), z2.VnD(), z1.VnD(), z3.VnD());
+
+ __ Ptrue(p0.VnB());
+ __ Index(z4.VnD(), -8, 3);
+ __ Scvtf(z4.VnD(), p0.Merging(), z4.VnD());
+ __ Mov(z5.VnD(), 0);
+ __ Fmmla(z4.VnD(), z4.VnD(), z4.VnD(), z4.VnD());
+ __ Fmmla(z5.VnD(), z5.VnD(), z4.VnD(), z3.VnD());
+
+ __ Bind(&vl_too_short);
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ int vl = core.GetSVELaneCount(kBRegSize) * 8;
+ if (vl >= 256) {
+ ASSERT_EQUAL_SVE(z1, z2);
+ ASSERT_EQUAL_SVE(z4, z5);
+
+ switch (vl) {
+ case 256:
+ case 384: {
+ // All results are 4.0 (1 * 1 + 2). Results for elements beyond a VL
+ // that's a multiple of 256 bits should be zero.
+ uint64_t z1_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0x4010000000000000,
+ 0x4010000000000000,
+ 0x4010000000000000,
+ 0x4010000000000000};
+ ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
+
+ uint64_t z4_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0x4018000000000000, // 6.0
+ 0x4022000000000000, // 9.0
+ 0x4018000000000000, // 6.0
+ 0x4054400000000000}; // 81.0
+ ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
+ break;
+ }
+ case 2048: {
+ uint64_t z1_expected[] =
+ {0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000, 0x4010000000000000,
+ 0x4010000000000000, 0x4010000000000000};
+ ASSERT_EQUAL_SVE(z1_expected, z1.VnD());
+
+ uint64_t z4_expected[] = {
+ 0x40cb690000000000, 0x40c9728000000000, 0x40c9710000000000,
+ 0x40c79e8000000000, 0x40c41f0000000000, 0x40c2708000000000,
+ 0x40c26f0000000000, 0x40c0e48000000000, 0x40bbea0000000000,
+ 0x40b91d0000000000, 0x40b91a0000000000, 0x40b6950000000000,
+ 0x40b1d60000000000, 0x40af320000000000, 0x40af2c0000000000,
+ 0x40ab420000000000, 0x40a4040000000000, 0x40a0aa0000000000,
+ 0x40a0a40000000000, 0x409bb40000000000, 0x4091b80000000000,
+ 0x408a880000000000, 0x408a700000000000, 0x4083c80000000000,
+ 0x4071a00000000000, 0x4061a00000000000, 0x4061400000000000,
+ 0x4051400000000000, 0x4018000000000000, 0x4022000000000000,
+ 0x4018000000000000, 0x4054400000000000,
+ };
+ ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
+ break;
+ }
+ default:
+ printf("WARNING: Some tests skipped due to unexpected VL.\n");
+ break;
+ }
+ }
+ }
+}
+Test* test_sve_fmatmul_list[] =
+ {Test::MakeSVETest(256, "AARCH64_ASM_sve_fmatmul_vl256", &Testsve_fmatmul),
+ Test::MakeSVETest(384, "AARCH64_ASM_sve_fmatmul_vl384", &Testsve_fmatmul),
+ Test::MakeSVETest(2048,
+ "AARCH64_ASM_sve_fmatmul_vl2048",
+ &Testsve_fmatmul)};
+
+void Testsve_ld1ro(Test* config) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM);
+ START();
+
+ int data_size = (kQRegSizeInBytes + 128) * 4;
+ uint8_t* data = new uint8_t[data_size];
+ for (int i = 0; i < data_size; i++) {
+ data[i] = i & 0xff;
+ }
+
+ // Set the base to just past half-way through the buffer so we can use
+ // negative indices.
+ __ Mov(x0, reinterpret_cast<uintptr_t>(&data[7 + data_size / 2]));
+
+ __ Index(z0.VnB(), 0, 1);
+ __ Ptrue(p0.VnB());
+ __ Cmplo(p0.VnB(), p0.Zeroing(), z0.VnB(), 4);
+ __ Pfalse(p1.VnB());
+ __ Zip1(p1.VnB(), p0.VnB(), p1.VnB());
+ __ Ptrue(p2.VnB());
+
+ __ Mov(x1, -32);
+ __ Ld1rob(z0.VnB(), p1.Zeroing(), SVEMemOperand(x0, -32));
+ __ Ld1rob(z1.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1));
+
+ __ Mov(x1, 64 / 2);
+ __ Ld1roh(z2.VnH(), p2.Zeroing(), SVEMemOperand(x0, 64));
+ __ Ld1roh(z3.VnH(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 1));
+
+ __ Mov(x1, -96 / 4);
+ __ Ld1row(z4.VnS(), p2.Zeroing(), SVEMemOperand(x0, -96));
+ __ Ld1row(z5.VnS(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 2));
+
+ __ Mov(x1, 128 / 8);
+ __ Ld1rod(z6.VnD(), p2.Zeroing(), SVEMemOperand(x0, 128));
+ __ Ld1rod(z7.VnD(), p2.Zeroing(), SVEMemOperand(x0, x1, LSL, 3));
+
+ // Check that all 256-bit segments match by rotating the vector by one
+ // segment, eoring, and orring across the vector.
+ __ Dup(z11.VnQ(), z0.VnQ(), 2);
+ __ Mov(z8, z0);
+ __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32);
+ __ Eor(z8.VnB(), z8.VnB(), z0.VnB());
+ __ Orv(b9, p2, z8.VnB());
+
+ __ Mov(z8, z2);
+ __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32);
+ __ Eor(z8.VnB(), z8.VnB(), z2.VnB());
+ __ Orv(b8, p2, z8.VnB());
+ __ Orr(z9, z9, z8);
+
+ __ Mov(z8, z4);
+ __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32);
+ __ Eor(z8.VnB(), z8.VnB(), z4.VnB());
+ __ Orv(b8, p2, z8.VnB());
+ __ Orr(z9, z9, z8);
+
+ __ Mov(z8, z6);
+ __ Ext(z8.VnB(), z8.VnB(), z8.VnB(), 32);
+ __ Eor(z8.VnB(), z8.VnB(), z6.VnB());
+ __ Orv(b8, p2, z8.VnB());
+ __ Orr(z9, z9, z8);
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ int vl = core.GetSVELaneCount(kBRegSize) * 8;
+ if (vl >= 256) {
+ ASSERT_EQUAL_SVE(z0, z1);
+ ASSERT_EQUAL_SVE(z2, z3);
+ ASSERT_EQUAL_SVE(z4, z5);
+ ASSERT_EQUAL_SVE(z6, z7);
+
+ switch (vl) {
+ case 256:
+ case 2048: {
+ // Check the result of the rotate/eor sequence.
+ uint64_t expected_z9[] = {0, 0};
+ ASSERT_EQUAL_SVE(expected_z9, z9.VnD());
+ break;
+ }
+ case 384: {
+ // For non-multiple-of-256 VL, the top 128-bits must be zero, which
+ // breaks the rotate/eor sequence. Check the results explicitly.
+ uint64_t z0_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ 0x000d000b00090007};
+ uint64_t z2_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0x868584838281807f,
+ 0x7e7d7c7b7a797877,
+ 0x767574737271706f,
+ 0x6e6d6c6b6a696867};
+ uint64_t z4_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0xe6e5e4e3e2e1e0df,
+ 0xdedddcdbdad9d8d7,
+ 0xd6d5d4d3d2d1d0cf,
+ 0xcecdcccbcac9c8c7};
+ uint64_t z6_expected[] = {0x0000000000000000,
+ 0x0000000000000000,
+ 0xc6c5c4c3c2c1c0bf,
+ 0xbebdbcbbbab9b8b7,
+ 0xb6b5b4b3b2b1b0af,
+ 0xaeadacabaaa9a8a7};
+ ASSERT_EQUAL_SVE(z0_expected, z0.VnD());
+ ASSERT_EQUAL_SVE(z2_expected, z2.VnD());
+ ASSERT_EQUAL_SVE(z4_expected, z4.VnD());
+ ASSERT_EQUAL_SVE(z6_expected, z6.VnD());
+ break;
+ }
+ default:
+ printf("WARNING: Some tests skipped due to unexpected VL.\n");
+ break;
+ }
+ }
+ }
+}
+Test* test_sve_ld1ro_list[] =
+ {Test::MakeSVETest(256, "AARCH64_ASM_sve_ld1ro_vl256", &Testsve_ld1ro),
+ Test::MakeSVETest(384, "AARCH64_ASM_sve_ld1ro_vl384", &Testsve_ld1ro),
+ Test::MakeSVETest(2048, "AARCH64_ASM_sve_ld1ro_vl2048", &Testsve_ld1ro)};
+#endif
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 4c5f5951..553168c8 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -133,6 +133,15 @@ TEST(move_immediate) {
COMPARE(movn(x19, 0x5555, 32), "mov x19, #0xffffaaaaffffffff");
COMPARE(movn(x20, 0xaaaa, 48), "mov x20, #0x5555ffffffffffff");
+ COMPARE(mov(w14, 0x1234), "mov w14, #0x1234");
+ COMPARE(mov(x15, 0xabcd0000), "mov x15, #0xabcd0000");
+ COMPARE(mov(x16, 0xaaaa000000000000), "mov x16, #0xaaaa000000000000");
+ COMPARE(mov(w17, 0xaaaaffff), "mov w17, #0xaaaaffff");
+ COMPARE(mov(x18, 0xffffaaaaffffffff), "mov x18, #0xffffaaaaffffffff");
+ COMPARE(mov(x19, 0xffffffffffffffff), "mov x19, #0xffffffffffffffff");
+ COMPARE(mov(x20, 0xc001c001c001c001), "mov x20, #0xc001c001c001c001");
+ COMPARE(mov(sp, 0xfefefefefefefefe), "mov sp, #0xfefefefefefefefe");
+
COMPARE(movk(w21, 0), "movk w21, #0x0");
COMPARE(movk(x22, 0, 0), "movk x22, #0x0");
COMPARE(movk(w23, 0, 16), "movk w23, #0x0, lsl #16");
@@ -560,6 +569,19 @@ TEST(bitfield) {
CLEANUP();
}
+TEST(bitfield_regression_test) {
+ SETUP();
+
+ COMPARE(dci(0x533ae450), "unallocated (Unallocated)");
+ COMPARE(dci(0x133c464c), "unallocated (Unallocated)");
+ COMPARE(dci(0x133c4e6d), "unallocated (Unallocated)");
+ COMPARE(dci(0x133c5e45), "unallocated (Unallocated)");
+ COMPARE(dci(0x1335853c), "unallocated (Unallocated)");
+ COMPARE(dci(0x1335a73d), "unallocated (Unallocated)");
+
+ CLEANUP();
+}
+
TEST(crc32b) {
SETUP();
@@ -2919,8 +2941,8 @@ TEST(barriers) {
COMPARE_MACRO(Dsb(FullSystem, BarrierOther), "dsb sy (0b1100)");
COMPARE_MACRO(Dsb(InnerShareable, BarrierOther), "dsb sy (0b1000)");
- COMPARE_MACRO(Dsb(NonShareable, BarrierOther), "dsb sy (0b0100)");
- COMPARE_MACRO(Dsb(OuterShareable, BarrierOther), "dsb sy (0b0000)");
+ COMPARE_MACRO(Dsb(NonShareable, BarrierOther), "pssbb");
+ COMPARE_MACRO(Dsb(OuterShareable, BarrierOther), "ssbb");
// ISB
COMPARE_MACRO(Isb(), "isb");
@@ -3021,7 +3043,7 @@ TEST(hint) {
COMPARE(hint(WFI), "wfi");
COMPARE(hint(SEV), "sev");
COMPARE(hint(SEVL), "sevl");
- COMPARE(hint(6), "hint #6");
+ COMPARE(hint(6), "dgh");
COMPARE(hint(ESB), "esb");
COMPARE(hint(CSDB), "csdb");
COMPARE(hint(42), "hint #42");
@@ -3077,5 +3099,634 @@ TEST(udf) {
CLEANUP();
}
+TEST(architecture_features) {
+ SETUP();
+
+ // ARMv8.1 - LOR
+ COMPARE_PREFIX(dci(0x08800000), "stllrb"); // STLLRB_SL32_ldstexcl
+ COMPARE_PREFIX(dci(0x08c00000), "ldlarb"); // LDLARB_LR32_ldstexcl
+ COMPARE_PREFIX(dci(0x48800000), "stllrh"); // STLLRH_SL32_ldstexcl
+ COMPARE_PREFIX(dci(0x48c00000), "ldlarh"); // LDLARH_LR32_ldstexcl
+ COMPARE_PREFIX(dci(0x88800000), "stllr"); // STLLR_SL32_ldstexcl
+ COMPARE_PREFIX(dci(0x88c00000), "ldlar"); // LDLAR_LR32_ldstexcl
+ COMPARE_PREFIX(dci(0xc8800000), "stllr"); // STLLR_SL64_ldstexcl
+ COMPARE_PREFIX(dci(0xc8c00000), "ldlar"); // LDLAR_LR64_ldstexcl
+
+ // ARMv8.1 - LSE
+ COMPARE_PREFIX(dci(0x08207c00), "casp"); // CASP_CP32_ldstexcl
+ COMPARE_PREFIX(dci(0x0820fc00), "caspl"); // CASPL_CP32_ldstexcl
+ COMPARE_PREFIX(dci(0x08607c00), "caspa"); // CASPA_CP32_ldstexcl
+ COMPARE_PREFIX(dci(0x0860fc00), "caspal"); // CASPAL_CP32_ldstexcl
+ COMPARE_PREFIX(dci(0x08a07c00), "casb"); // CASB_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x08a0fc00), "caslb"); // CASLB_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x08e07c00), "casab"); // CASAB_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x08e0fc00), "casalb"); // CASALB_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x38200000), "ldaddb"); // LDADDB_32_memop
+ COMPARE_PREFIX(dci(0x38201000), "ldclrb"); // LDCLRB_32_memop
+ COMPARE_PREFIX(dci(0x38202000), "ldeorb"); // LDEORB_32_memop
+ COMPARE_PREFIX(dci(0x38203000), "ldsetb"); // LDSETB_32_memop
+ COMPARE_PREFIX(dci(0x38204000), "ldsmaxb"); // LDSMAXB_32_memop
+ COMPARE_PREFIX(dci(0x38205000), "ldsminb"); // LDSMINB_32_memop
+ COMPARE_PREFIX(dci(0x38206000), "ldumaxb"); // LDUMAXB_32_memop
+ COMPARE_PREFIX(dci(0x38207000), "lduminb"); // LDUMINB_32_memop
+ COMPARE_PREFIX(dci(0x38208000), "swpb"); // SWPB_32_memop
+ COMPARE_PREFIX(dci(0x38600000), "ldaddlb"); // LDADDLB_32_memop
+ COMPARE_PREFIX(dci(0x38601000), "ldclrlb"); // LDCLRLB_32_memop
+ COMPARE_PREFIX(dci(0x38602000), "ldeorlb"); // LDEORLB_32_memop
+ COMPARE_PREFIX(dci(0x38603000), "ldsetlb"); // LDSETLB_32_memop
+ COMPARE_PREFIX(dci(0x38604000), "ldsmaxlb"); // LDSMAXLB_32_memop
+ COMPARE_PREFIX(dci(0x38605000), "ldsminlb"); // LDSMINLB_32_memop
+ COMPARE_PREFIX(dci(0x38606000), "ldumaxlb"); // LDUMAXLB_32_memop
+ COMPARE_PREFIX(dci(0x38607000), "lduminlb"); // LDUMINLB_32_memop
+ COMPARE_PREFIX(dci(0x38608000), "swplb"); // SWPLB_32_memop
+ COMPARE_PREFIX(dci(0x38a00000), "ldaddab"); // LDADDAB_32_memop
+ COMPARE_PREFIX(dci(0x38a01000), "ldclrab"); // LDCLRAB_32_memop
+ COMPARE_PREFIX(dci(0x38a02000), "ldeorab"); // LDEORAB_32_memop
+ COMPARE_PREFIX(dci(0x38a03000), "ldsetab"); // LDSETAB_32_memop
+ COMPARE_PREFIX(dci(0x38a04000), "ldsmaxab"); // LDSMAXAB_32_memop
+ COMPARE_PREFIX(dci(0x38a05000), "ldsminab"); // LDSMINAB_32_memop
+ COMPARE_PREFIX(dci(0x38a06000), "ldumaxab"); // LDUMAXAB_32_memop
+ COMPARE_PREFIX(dci(0x38a07000), "lduminab"); // LDUMINAB_32_memop
+ COMPARE_PREFIX(dci(0x38a08000), "swpab"); // SWPAB_32_memop
+ COMPARE_PREFIX(dci(0x38e00000), "ldaddalb"); // LDADDALB_32_memop
+ COMPARE_PREFIX(dci(0x38e01000), "ldclralb"); // LDCLRALB_32_memop
+ COMPARE_PREFIX(dci(0x38e02000), "ldeoralb"); // LDEORALB_32_memop
+ COMPARE_PREFIX(dci(0x38e03000), "ldsetalb"); // LDSETALB_32_memop
+ COMPARE_PREFIX(dci(0x38e04000), "ldsmaxalb"); // LDSMAXALB_32_memop
+ COMPARE_PREFIX(dci(0x38e05000), "ldsminalb"); // LDSMINALB_32_memop
+ COMPARE_PREFIX(dci(0x38e06000), "ldumaxalb"); // LDUMAXALB_32_memop
+ COMPARE_PREFIX(dci(0x38e07000), "lduminalb"); // LDUMINALB_32_memop
+ COMPARE_PREFIX(dci(0x38e08000), "swpalb"); // SWPALB_32_memop
+ COMPARE_PREFIX(dci(0x48207c00), "casp"); // CASP_CP64_ldstexcl
+ COMPARE_PREFIX(dci(0x4820fc00), "caspl"); // CASPL_CP64_ldstexcl
+ COMPARE_PREFIX(dci(0x48607c00), "caspa"); // CASPA_CP64_ldstexcl
+ COMPARE_PREFIX(dci(0x4860fc00), "caspal"); // CASPAL_CP64_ldstexcl
+ COMPARE_PREFIX(dci(0x48a07c00), "cash"); // CASH_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x48a0fc00), "caslh"); // CASLH_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x48e07c00), "casah"); // CASAH_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x48e0fc00), "casalh"); // CASALH_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x78200000), "ldaddh"); // LDADDH_32_memop
+ COMPARE_PREFIX(dci(0x78201000), "ldclrh"); // LDCLRH_32_memop
+ COMPARE_PREFIX(dci(0x78202000), "ldeorh"); // LDEORH_32_memop
+ COMPARE_PREFIX(dci(0x78203000), "ldseth"); // LDSETH_32_memop
+ COMPARE_PREFIX(dci(0x78204000), "ldsmaxh"); // LDSMAXH_32_memop
+ COMPARE_PREFIX(dci(0x78205000), "ldsminh"); // LDSMINH_32_memop
+ COMPARE_PREFIX(dci(0x78206000), "ldumaxh"); // LDUMAXH_32_memop
+ COMPARE_PREFIX(dci(0x78207000), "lduminh"); // LDUMINH_32_memop
+ COMPARE_PREFIX(dci(0x78208000), "swph"); // SWPH_32_memop
+ COMPARE_PREFIX(dci(0x78600000), "ldaddlh"); // LDADDLH_32_memop
+ COMPARE_PREFIX(dci(0x78601000), "ldclrlh"); // LDCLRLH_32_memop
+ COMPARE_PREFIX(dci(0x78602000), "ldeorlh"); // LDEORLH_32_memop
+ COMPARE_PREFIX(dci(0x78603000), "ldsetlh"); // LDSETLH_32_memop
+ COMPARE_PREFIX(dci(0x78604000), "ldsmaxlh"); // LDSMAXLH_32_memop
+ COMPARE_PREFIX(dci(0x78605000), "ldsminlh"); // LDSMINLH_32_memop
+ COMPARE_PREFIX(dci(0x78606000), "ldumaxlh"); // LDUMAXLH_32_memop
+ COMPARE_PREFIX(dci(0x78607000), "lduminlh"); // LDUMINLH_32_memop
+ COMPARE_PREFIX(dci(0x78608000), "swplh"); // SWPLH_32_memop
+ COMPARE_PREFIX(dci(0x78a00000), "ldaddah"); // LDADDAH_32_memop
+ COMPARE_PREFIX(dci(0x78a01000), "ldclrah"); // LDCLRAH_32_memop
+ COMPARE_PREFIX(dci(0x78a02000), "ldeorah"); // LDEORAH_32_memop
+ COMPARE_PREFIX(dci(0x78a03000), "ldsetah"); // LDSETAH_32_memop
+ COMPARE_PREFIX(dci(0x78a04000), "ldsmaxah"); // LDSMAXAH_32_memop
+ COMPARE_PREFIX(dci(0x78a05000), "ldsminah"); // LDSMINAH_32_memop
+ COMPARE_PREFIX(dci(0x78a06000), "ldumaxah"); // LDUMAXAH_32_memop
+ COMPARE_PREFIX(dci(0x78a07000), "lduminah"); // LDUMINAH_32_memop
+ COMPARE_PREFIX(dci(0x78a08000), "swpah"); // SWPAH_32_memop
+ COMPARE_PREFIX(dci(0x78e00000), "ldaddalh"); // LDADDALH_32_memop
+ COMPARE_PREFIX(dci(0x78e01000), "ldclralh"); // LDCLRALH_32_memop
+ COMPARE_PREFIX(dci(0x78e02000), "ldeoralh"); // LDEORALH_32_memop
+ COMPARE_PREFIX(dci(0x78e03000), "ldsetalh"); // LDSETALH_32_memop
+ COMPARE_PREFIX(dci(0x78e04000), "ldsmaxalh"); // LDSMAXALH_32_memop
+ COMPARE_PREFIX(dci(0x78e05000), "ldsminalh"); // LDSMINALH_32_memop
+ COMPARE_PREFIX(dci(0x78e06000), "ldumaxalh"); // LDUMAXALH_32_memop
+ COMPARE_PREFIX(dci(0x78e07000), "lduminalh"); // LDUMINALH_32_memop
+ COMPARE_PREFIX(dci(0x78e08000), "swpalh"); // SWPALH_32_memop
+ COMPARE_PREFIX(dci(0x88a07c00), "cas"); // CAS_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x88a0fc00), "casl"); // CASL_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x88e07c00), "casa"); // CASA_C32_ldstexcl
+ COMPARE_PREFIX(dci(0x88e0fc00), "casal"); // CASAL_C32_ldstexcl
+ COMPARE_PREFIX(dci(0xb8200000), "ldadd"); // LDADD_32_memop
+ COMPARE_PREFIX(dci(0xb8201000), "ldclr"); // LDCLR_32_memop
+ COMPARE_PREFIX(dci(0xb8202000), "ldeor"); // LDEOR_32_memop
+ COMPARE_PREFIX(dci(0xb8203000), "ldset"); // LDSET_32_memop
+ COMPARE_PREFIX(dci(0xb8204000), "ldsmax"); // LDSMAX_32_memop
+ COMPARE_PREFIX(dci(0xb8205000), "ldsmin"); // LDSMIN_32_memop
+ COMPARE_PREFIX(dci(0xb8206000), "ldumax"); // LDUMAX_32_memop
+ COMPARE_PREFIX(dci(0xb8207000), "ldumin"); // LDUMIN_32_memop
+ COMPARE_PREFIX(dci(0xb8208000), "swp"); // SWP_32_memop
+ COMPARE_PREFIX(dci(0xb8600000), "ldaddl"); // LDADDL_32_memop
+ COMPARE_PREFIX(dci(0xb8601000), "ldclrl"); // LDCLRL_32_memop
+ COMPARE_PREFIX(dci(0xb8602000), "ldeorl"); // LDEORL_32_memop
+ COMPARE_PREFIX(dci(0xb8603000), "ldsetl"); // LDSETL_32_memop
+ COMPARE_PREFIX(dci(0xb8604000), "ldsmaxl"); // LDSMAXL_32_memop
+ COMPARE_PREFIX(dci(0xb8605000), "ldsminl"); // LDSMINL_32_memop
+ COMPARE_PREFIX(dci(0xb8606000), "ldumaxl"); // LDUMAXL_32_memop
+ COMPARE_PREFIX(dci(0xb8607000), "lduminl"); // LDUMINL_32_memop
+ COMPARE_PREFIX(dci(0xb8608000), "swpl"); // SWPL_32_memop
+ COMPARE_PREFIX(dci(0xb8a00000), "ldadda"); // LDADDA_32_memop
+ COMPARE_PREFIX(dci(0xb8a01000), "ldclra"); // LDCLRA_32_memop
+ COMPARE_PREFIX(dci(0xb8a02000), "ldeora"); // LDEORA_32_memop
+ COMPARE_PREFIX(dci(0xb8a03000), "ldseta"); // LDSETA_32_memop
+ COMPARE_PREFIX(dci(0xb8a04000), "ldsmaxa"); // LDSMAXA_32_memop
+ COMPARE_PREFIX(dci(0xb8a05000), "ldsmina"); // LDSMINA_32_memop
+ COMPARE_PREFIX(dci(0xb8a06000), "ldumaxa"); // LDUMAXA_32_memop
+ COMPARE_PREFIX(dci(0xb8a07000), "ldumina"); // LDUMINA_32_memop
+ COMPARE_PREFIX(dci(0xb8a08000), "swpa"); // SWPA_32_memop
+ COMPARE_PREFIX(dci(0xb8e00000), "ldaddal"); // LDADDAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e01000), "ldclral"); // LDCLRAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e02000), "ldeoral"); // LDEORAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e03000), "ldsetal"); // LDSETAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e04000), "ldsmaxal"); // LDSMAXAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e05000), "ldsminal"); // LDSMINAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e06000), "ldumaxal"); // LDUMAXAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e07000), "lduminal"); // LDUMINAL_32_memop
+ COMPARE_PREFIX(dci(0xb8e08000), "swpal"); // SWPAL_32_memop
+ COMPARE_PREFIX(dci(0xc8a07c00), "cas"); // CAS_C64_ldstexcl
+ COMPARE_PREFIX(dci(0xc8a0fc00), "casl"); // CASL_C64_ldstexcl
+ COMPARE_PREFIX(dci(0xc8e07c00), "casa"); // CASA_C64_ldstexcl
+ COMPARE_PREFIX(dci(0xc8e0fc00), "casal"); // CASAL_C64_ldstexcl
+ COMPARE_PREFIX(dci(0xf8200000), "ldadd"); // LDADD_64_memop
+ COMPARE_PREFIX(dci(0xf8201000), "ldclr"); // LDCLR_64_memop
+ COMPARE_PREFIX(dci(0xf8202000), "ldeor"); // LDEOR_64_memop
+ COMPARE_PREFIX(dci(0xf8203000), "ldset"); // LDSET_64_memop
+ COMPARE_PREFIX(dci(0xf8204000), "ldsmax"); // LDSMAX_64_memop
+ COMPARE_PREFIX(dci(0xf8205000), "ldsmin"); // LDSMIN_64_memop
+ COMPARE_PREFIX(dci(0xf8206000), "ldumax"); // LDUMAX_64_memop
+ COMPARE_PREFIX(dci(0xf8207000), "ldumin"); // LDUMIN_64_memop
+ COMPARE_PREFIX(dci(0xf8208000), "swp"); // SWP_64_memop
+ COMPARE_PREFIX(dci(0xf8600000), "ldaddl"); // LDADDL_64_memop
+ COMPARE_PREFIX(dci(0xf8601000), "ldclrl"); // LDCLRL_64_memop
+ COMPARE_PREFIX(dci(0xf8602000), "ldeorl"); // LDEORL_64_memop
+ COMPARE_PREFIX(dci(0xf8603000), "ldsetl"); // LDSETL_64_memop
+ COMPARE_PREFIX(dci(0xf8604000), "ldsmaxl"); // LDSMAXL_64_memop
+ COMPARE_PREFIX(dci(0xf8605000), "ldsminl"); // LDSMINL_64_memop
+ COMPARE_PREFIX(dci(0xf8606000), "ldumaxl"); // LDUMAXL_64_memop
+ COMPARE_PREFIX(dci(0xf8607000), "lduminl"); // LDUMINL_64_memop
+ COMPARE_PREFIX(dci(0xf8608000), "swpl"); // SWPL_64_memop
+ COMPARE_PREFIX(dci(0xf8a00000), "ldadda"); // LDADDA_64_memop
+ COMPARE_PREFIX(dci(0xf8a01000), "ldclra"); // LDCLRA_64_memop
+ COMPARE_PREFIX(dci(0xf8a02000), "ldeora"); // LDEORA_64_memop
+ COMPARE_PREFIX(dci(0xf8a03000), "ldseta"); // LDSETA_64_memop
+ COMPARE_PREFIX(dci(0xf8a04000), "ldsmaxa"); // LDSMAXA_64_memop
+ COMPARE_PREFIX(dci(0xf8a05000), "ldsmina"); // LDSMINA_64_memop
+ COMPARE_PREFIX(dci(0xf8a06000), "ldumaxa"); // LDUMAXA_64_memop
+ COMPARE_PREFIX(dci(0xf8a07000), "ldumina"); // LDUMINA_64_memop
+ COMPARE_PREFIX(dci(0xf8a08000), "swpa"); // SWPA_64_memop
+ COMPARE_PREFIX(dci(0xf8e00000), "ldaddal"); // LDADDAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e01000), "ldclral"); // LDCLRAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e02000), "ldeoral"); // LDEORAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e03000), "ldsetal"); // LDSETAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e04000), "ldsmaxal"); // LDSMAXAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e05000), "ldsminal"); // LDSMINAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e06000), "ldumaxal"); // LDUMAXAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e07000), "lduminal"); // LDUMINAL_64_memop
+ COMPARE_PREFIX(dci(0xf8e08000), "swpal"); // SWPAL_64_memop
+
+ // ARMv8.1 - RDM
+ COMPARE_PREFIX(dci(0x2e008400), "sqrdmlah"); // SQRDMLAH_asimdsame2_only
+ COMPARE_PREFIX(dci(0x2e008c00), "sqrdmlsh"); // SQRDMLSH_asimdsame2_only
+ COMPARE_PREFIX(dci(0x2f40d000), "sqrdmlah"); // SQRDMLAH_asimdelem_R
+ COMPARE_PREFIX(dci(0x2f40f000), "sqrdmlsh"); // SQRDMLSH_asimdelem_R
+ COMPARE_PREFIX(dci(0x7e008400), "sqrdmlah"); // SQRDMLAH_asisdsame2_only
+ COMPARE_PREFIX(dci(0x7e008c00), "sqrdmlsh"); // SQRDMLSH_asisdsame2_only
+ COMPARE_PREFIX(dci(0x7f40d000), "sqrdmlah"); // SQRDMLAH_asisdelem_R
+ COMPARE_PREFIX(dci(0x7f40f000), "sqrdmlsh"); // SQRDMLSH_asisdelem_R
+
+ // ARMv8.2 - DotProd
+ COMPARE_PREFIX(dci(0x0e009400), "sdot"); // SDOT_asimdsame2_D
+ COMPARE_PREFIX(dci(0x0f00e000), "sdot"); // SDOT_asimdelem_D
+ COMPARE_PREFIX(dci(0x2e009400), "udot"); // UDOT_asimdsame2_D
+ COMPARE_PREFIX(dci(0x2f00e000), "udot"); // UDOT_asimdelem_D
+
+ // ARMv8.2 - FHM
+ COMPARE_PREFIX(dci(0x0e20ec00), "fmlal"); // FMLAL_asimdsame_F
+ COMPARE_PREFIX(dci(0x0ea0ec00), "fmlsl"); // FMLSL_asimdsame_F
+ COMPARE_PREFIX(dci(0x0f800000), "fmlal"); // FMLAL_asimdelem_LH
+ COMPARE_PREFIX(dci(0x0f804000), "fmlsl"); // FMLSL_asimdelem_LH
+ COMPARE_PREFIX(dci(0x2e20cc00), "fmlal2"); // FMLAL2_asimdsame_F
+ COMPARE_PREFIX(dci(0x2ea0cc00), "fmlsl2"); // FMLSL2_asimdsame_F
+ COMPARE_PREFIX(dci(0x2f808000), "fmlal2"); // FMLAL2_asimdelem_LH
+ COMPARE_PREFIX(dci(0x2f80c000), "fmlsl2"); // FMLSL2_asimdelem_LH
+
+ // ARMv8.2 - FP16
+ COMPARE_PREFIX(dci(0x0e20c400), "fmaxnm"); // FMAXNM_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20cc00), "fmla"); // FMLA_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20d400), "fadd"); // FADD_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20dc00), "fmulx"); // FMULX_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20e400), "fcmeq"); // FCMEQ_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20f400), "fmax"); // FMAX_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e20fc00), "frecps"); // FRECPS_asimdsame_only
+ COMPARE_PREFIX(dci(0x0e218800), "frintn"); // FRINTN_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e219800), "frintm"); // FRINTM_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e21a800), "fcvtns"); // FCVTNS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e21b800), "fcvtms"); // FCVTMS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e21c800), "fcvtas"); // FCVTAS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e21d800), "scvtf"); // SCVTF_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e30c800), "fmaxnmv"); // FMAXNMV_asimdall_only_H
+ COMPARE_PREFIX(dci(0x0e30f800), "fmaxv"); // FMAXV_asimdall_only_H
+ COMPARE_PREFIX(dci(0x0e400400), "fmaxnm"); // FMAXNM_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e400c00), "fmla"); // FMLA_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e401400), "fadd"); // FADD_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e401c00), "fmulx"); // FMULX_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e402400), "fcmeq"); // FCMEQ_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e403400), "fmax"); // FMAX_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e403c00), "frecps"); // FRECPS_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0e798800), "frintn"); // FRINTN_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0e799800), "frintm"); // FRINTM_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0e79a800), "fcvtns"); // FCVTNS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0e79b800), "fcvtms"); // FCVTMS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0e79c800), "fcvtas"); // FCVTAS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0e79d800), "scvtf"); // SCVTF_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ea0c400), "fminnm"); // FMINNM_asimdsame_only
+ COMPARE_PREFIX(dci(0x0ea0cc00), "fmls"); // FMLS_asimdsame_only
+ COMPARE_PREFIX(dci(0x0ea0d400), "fsub"); // FSUB_asimdsame_only
+ COMPARE_PREFIX(dci(0x0ea0f400), "fmin"); // FMIN_asimdsame_only
+ COMPARE_PREFIX(dci(0x0ea0fc00), "frsqrts"); // FRSQRTS_asimdsame_only
+ COMPARE_PREFIX(dci(0x0ea0c800), "fcmgt"); // FCMGT_asimdmisc_FZ
+ COMPARE_PREFIX(dci(0x0ea0d800), "fcmeq"); // FCMEQ_asimdmisc_FZ
+ COMPARE_PREFIX(dci(0x0ea0e800), "fcmlt"); // FCMLT_asimdmisc_FZ
+ COMPARE_PREFIX(dci(0x0ea0f800), "fabs"); // FABS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0ea18800), "frintp"); // FRINTP_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0ea19800), "frintz"); // FRINTZ_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0ea1a800), "fcvtps"); // FCVTPS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0ea1b800), "fcvtzs"); // FCVTZS_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0ea1d800), "frecpe"); // FRECPE_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0eb0c800), "fminnmv"); // FMINNMV_asimdall_only_H
+ COMPARE_PREFIX(dci(0x0eb0f800), "fminv"); // FMINV_asimdall_only_H
+ COMPARE_PREFIX(dci(0x0ec00400), "fminnm"); // FMINNM_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0ec00c00), "fmls"); // FMLS_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0ec01400), "fsub"); // FSUB_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0ec03400), "fmin"); // FMIN_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0ec03c00), "frsqrts"); // FRSQRTS_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x0ef8c800), "fcmgt"); // FCMGT_asimdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x0ef8d800), "fcmeq"); // FCMEQ_asimdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x0ef8e800), "fcmlt"); // FCMLT_asimdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x0ef8f800), "fabs"); // FABS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ef98800), "frintp"); // FRINTP_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ef99800), "frintz"); // FRINTZ_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ef9a800), "fcvtps"); // FCVTPS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ef9b800), "fcvtzs"); // FCVTZS_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0ef9d800), "frecpe"); // FRECPE_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x0f001000), "fmla"); // FMLA_asimdelem_RH_H
+ COMPARE_PREFIX(dci(0x0f005000), "fmls"); // FMLS_asimdelem_RH_H
+ COMPARE_PREFIX(dci(0x0f009000), "fmul"); // FMUL_asimdelem_RH_H
+ COMPARE_PREFIX(dci(0x0f00f400), "fmov"); // FMOV_asimdimm_S_s
+ COMPARE_PREFIX(dci(0x0f00fc00), "fmov"); // FMOV_asimdimm_H_h
+ COMPARE_PREFIX(dci(0x0f801000), "fmla"); // FMLA_asimdelem_R_SD
+ COMPARE_PREFIX(dci(0x0f805000), "fmls"); // FMLS_asimdelem_R_SD
+ COMPARE_PREFIX(dci(0x0f809000), "fmul"); // FMUL_asimdelem_R_SD
+ COMPARE_PREFIX(dci(0x2e20c400), "fmaxnmp"); // FMAXNMP_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20d400), "faddp"); // FADDP_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20dc00), "fmul"); // FMUL_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20e400), "fcmge"); // FCMGE_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20ec00), "facge"); // FACGE_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20f400), "fmaxp"); // FMAXP_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e20fc00), "fdiv"); // FDIV_asimdsame_only
+ COMPARE_PREFIX(dci(0x2e218800), "frinta"); // FRINTA_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e219800), "frintx"); // FRINTX_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21a800), "fcvtnu"); // FCVTNU_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21b800), "fcvtmu"); // FCVTMU_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21c800), "fcvtau"); // FCVTAU_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21d800), "ucvtf"); // UCVTF_asimdmisc_R
+ COMPARE_PREFIX(dci(0x6e30c800), "fmaxnmv"); // FMAXNMV_asimdall_only_SD
+ COMPARE_PREFIX(dci(0x6e30f800), "fmaxv"); // FMAXV_asimdall_only_SD
+ COMPARE_PREFIX(dci(0x2e400400), "fmaxnmp"); // FMAXNMP_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e401400), "faddp"); // FADDP_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e401c00), "fmul"); // FMUL_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e402400), "fcmge"); // FCMGE_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e402c00), "facge"); // FACGE_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e403400), "fmaxp"); // FMAXP_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e403c00), "fdiv"); // FDIV_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2e798800), "frinta"); // FRINTA_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2e799800), "frintx"); // FRINTX_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2e79a800), "fcvtnu"); // FCVTNU_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2e79b800), "fcvtmu"); // FCVTMU_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2e79c800), "fcvtau"); // FCVTAU_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2e79d800), "ucvtf"); // UCVTF_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ea0c400), "fminnmp"); // FMINNMP_asimdsame_only
+ COMPARE_PREFIX(dci(0x2ea0d400), "fabd"); // FABD_asimdsame_only
+ COMPARE_PREFIX(dci(0x2ea0e400), "fcmgt"); // FCMGT_asimdsame_only
+ COMPARE_PREFIX(dci(0x2ea0ec00), "facgt"); // FACGT_asimdsame_only
+ COMPARE_PREFIX(dci(0x2ea0f400), "fminp"); // FMINP_asimdsame_only
+ COMPARE_PREFIX(dci(0x2ea0c800), "fcmge"); // FCMGE_asimdmisc_FZ
+ COMPARE_PREFIX(dci(0x2ea0d800), "fcmle"); // FCMLE_asimdmisc_FZ
+ COMPARE_PREFIX(dci(0x2ea0f800), "fneg"); // FNEG_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2ea19800), "frinti"); // FRINTI_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2ea1a800), "fcvtpu"); // FCVTPU_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2ea1b800), "fcvtzu"); // FCVTZU_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2ea1d800), "frsqrte"); // FRSQRTE_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2ea1f800), "fsqrt"); // FSQRT_asimdmisc_R
+ COMPARE_PREFIX(dci(0x6eb0c800), "fminnmv"); // FMINNMV_asimdall_only_SD
+ COMPARE_PREFIX(dci(0x6eb0f800), "fminv"); // FMINV_asimdall_only_SD
+ COMPARE_PREFIX(dci(0x2ec00400), "fminnmp"); // FMINNMP_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2ec01400), "fabd"); // FABD_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2ec02400), "fcmgt"); // FCMGT_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2ec02c00), "facgt"); // FACGT_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2ec03400), "fminp"); // FMINP_asimdsamefp16_only
+ COMPARE_PREFIX(dci(0x2ef8c800), "fcmge"); // FCMGE_asimdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x2ef8d800), "fcmle"); // FCMLE_asimdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x2ef8f800), "fneg"); // FNEG_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ef99800), "frinti"); // FRINTI_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ef9a800), "fcvtpu"); // FCVTPU_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ef9b800), "fcvtzu"); // FCVTZU_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ef9d800), "frsqrte"); // FRSQRTE_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2ef9f800), "fsqrt"); // FSQRT_asimdmiscfp16_R
+ COMPARE_PREFIX(dci(0x2f009000), "fmulx"); // FMULX_asimdelem_RH_H
+ COMPARE_PREFIX(dci(0x2f809000), "fmulx"); // FMULX_asimdelem_R_SD
+ COMPARE_PREFIX(dci(0x5e20dc00), "fmulx"); // FMULX_asisdsame_only
+ COMPARE_PREFIX(dci(0x5e20e400), "fcmeq"); // FCMEQ_asisdsame_only
+ COMPARE_PREFIX(dci(0x5e20fc00), "frecps"); // FRECPS_asisdsame_only
+ COMPARE_PREFIX(dci(0x5e21a800), "fcvtns"); // FCVTNS_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5e21b800), "fcvtms"); // FCVTMS_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5e21c800), "fcvtas"); // FCVTAS_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5e21d800), "scvtf"); // SCVTF_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5e30c800), "fmaxnmp"); // FMAXNMP_asisdpair_only_H
+ COMPARE_PREFIX(dci(0x5e30d800), "faddp"); // FADDP_asisdpair_only_H
+ COMPARE_PREFIX(dci(0x5e30f800), "fmaxp"); // FMAXP_asisdpair_only_H
+ COMPARE_PREFIX(dci(0x5e401c00), "fmulx"); // FMULX_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x5e402400), "fcmeq"); // FCMEQ_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x5e403c00), "frecps"); // FRECPS_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x5e79a800), "fcvtns"); // FCVTNS_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5e79b800), "fcvtms"); // FCVTMS_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5e79c800), "fcvtas"); // FCVTAS_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5e79d800), "scvtf"); // SCVTF_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5ea0fc00), "frsqrts"); // FRSQRTS_asisdsame_only
+ COMPARE_PREFIX(dci(0x5ea0c800), "fcmgt"); // FCMGT_asisdmisc_FZ
+ COMPARE_PREFIX(dci(0x5ea0d800), "fcmeq"); // FCMEQ_asisdmisc_FZ
+ COMPARE_PREFIX(dci(0x5ea0e800), "fcmlt"); // FCMLT_asisdmisc_FZ
+ COMPARE_PREFIX(dci(0x5ea1a800), "fcvtps"); // FCVTPS_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5ea1b800), "fcvtzs"); // FCVTZS_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5ea1d800), "frecpe"); // FRECPE_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5ea1f800), "frecpx"); // FRECPX_asisdmisc_R
+ COMPARE_PREFIX(dci(0x5eb0c800), "fminnmp"); // FMINNMP_asisdpair_only_H
+ COMPARE_PREFIX(dci(0x5eb0f800), "fminp"); // FMINP_asisdpair_only_H
+ COMPARE_PREFIX(dci(0x5ec03c00), "frsqrts"); // FRSQRTS_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x5ef8c800), "fcmgt"); // FCMGT_asisdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x5ef8d800), "fcmeq"); // FCMEQ_asisdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x5ef8e800), "fcmlt"); // FCMLT_asisdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x5ef9a800), "fcvtps"); // FCVTPS_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5ef9b800), "fcvtzs"); // FCVTZS_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5ef9d800), "frecpe"); // FRECPE_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5ef9f800), "frecpx"); // FRECPX_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x5f001000), "fmla"); // FMLA_asisdelem_RH_H
+ COMPARE_PREFIX(dci(0x5f005000), "fmls"); // FMLS_asisdelem_RH_H
+ COMPARE_PREFIX(dci(0x5f009000), "fmul"); // FMUL_asisdelem_RH_H
+ COMPARE_PREFIX(dci(0x5f801000), "fmla"); // FMLA_asisdelem_R_SD
+ COMPARE_PREFIX(dci(0x5f805000), "fmls"); // FMLS_asisdelem_R_SD
+ COMPARE_PREFIX(dci(0x5f809000), "fmul"); // FMUL_asisdelem_R_SD
+ COMPARE_PREFIX(dci(0x6f00f400), "fmov"); // FMOV_asimdimm_D2_d
+ COMPARE_PREFIX(dci(0x7e20e400), "fcmge"); // FCMGE_asisdsame_only
+ COMPARE_PREFIX(dci(0x7e20ec00), "facge"); // FACGE_asisdsame_only
+ COMPARE_PREFIX(dci(0x7e21a800), "fcvtnu"); // FCVTNU_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7e21b800), "fcvtmu"); // FCVTMU_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7e21c800), "fcvtau"); // FCVTAU_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7e21d800), "ucvtf"); // UCVTF_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7e30c800), "fmaxnmp"); // FMAXNMP_asisdpair_only_SD
+ COMPARE_PREFIX(dci(0x7e30d800), "faddp"); // FADDP_asisdpair_only_SD
+ COMPARE_PREFIX(dci(0x7e30f800), "fmaxp"); // FMAXP_asisdpair_only_SD
+ COMPARE_PREFIX(dci(0x7e402400), "fcmge"); // FCMGE_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x7e402c00), "facge"); // FACGE_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x7e79a800), "fcvtnu"); // FCVTNU_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7e79b800), "fcvtmu"); // FCVTMU_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7e79c800), "fcvtau"); // FCVTAU_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7e79d800), "ucvtf"); // UCVTF_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7ea0d400), "fabd"); // FABD_asisdsame_only
+ COMPARE_PREFIX(dci(0x7ea0e400), "fcmgt"); // FCMGT_asisdsame_only
+ COMPARE_PREFIX(dci(0x7ea0ec00), "facgt"); // FACGT_asisdsame_only
+ COMPARE_PREFIX(dci(0x7ea0c800), "fcmge"); // FCMGE_asisdmisc_FZ
+ COMPARE_PREFIX(dci(0x7ea0d800), "fcmle"); // FCMLE_asisdmisc_FZ
+ COMPARE_PREFIX(dci(0x7ea1a800), "fcvtpu"); // FCVTPU_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7ea1b800), "fcvtzu"); // FCVTZU_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7ea1d800), "frsqrte"); // FRSQRTE_asisdmisc_R
+ COMPARE_PREFIX(dci(0x7eb0c800), "fminnmp"); // FMINNMP_asisdpair_only_SD
+ COMPARE_PREFIX(dci(0x7eb0f800), "fminp"); // FMINP_asisdpair_only_SD
+ COMPARE_PREFIX(dci(0x7ec01400), "fabd"); // FABD_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x7ec02400), "fcmgt"); // FCMGT_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x7ec02c00), "facgt"); // FACGT_asisdsamefp16_only
+ COMPARE_PREFIX(dci(0x7ef8c800), "fcmge"); // FCMGE_asisdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x7ef8d800), "fcmle"); // FCMLE_asisdmiscfp16_FZ
+ COMPARE_PREFIX(dci(0x7ef9a800), "fcvtpu"); // FCVTPU_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7ef9b800), "fcvtzu"); // FCVTZU_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7ef9d800), "frsqrte"); // FRSQRTE_asisdmiscfp16_R
+ COMPARE_PREFIX(dci(0x7f009000), "fmulx"); // FMULX_asisdelem_RH_H
+ COMPARE_PREFIX(dci(0x7f809000), "fmulx"); // FMULX_asisdelem_R_SD
+
+ // ARMv8.2 - RAS
+ COMPARE_PREFIX(dci(0xd503221f), "esb"); // ESB_HI_hints
+
+ // ARMv8.2 - SHA3
+ // COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
+ // COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
+ // COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
+
+ // ARMv8.2 - SHA512
+ // COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xce608400), "sha512h2"); //
+ // SHA512H2_QQV_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xce608800), "sha512su1"); //
+ // SHA512SU1_VVV2_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xcec08000), "sha512su0"); //
+ // SHA512SU0_VV2_cryptosha512_2
+
+ // ARMv8.2 - SM3
+ // COMPARE_PREFIX(dci(0xce400000), "sm3ss1"); // SM3SS1_VVV4_crypto4
+ // COMPARE_PREFIX(dci(0xce408000), "sm3tt1a"); // SM3TT1A_VVV4_crypto3_imm2
+ // COMPARE_PREFIX(dci(0xce408400), "sm3tt1b"); // SM3TT1B_VVV4_crypto3_imm2
+ // COMPARE_PREFIX(dci(0xce408800), "sm3tt2a"); // SM3TT2A_VVV4_crypto3_imm2
+ // COMPARE_PREFIX(dci(0xce408c00), "sm3tt2b"); // SM3TT2B_VVV_crypto3_imm2
+ // COMPARE_PREFIX(dci(0xce60c000), "sm3partw1"); //
+ // SM3PARTW1_VVV4_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xce60c400), "sm3partw2"); //
+ // SM3PARTW2_VVV4_cryptosha512_3
+
+ // ARMv8.2 - SM4
+ // COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); //
+ // SM4EKEY_VVV4_cryptosha512_3
+ // COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2
+
+ // ARMv8.2 - SPE
+ // COMPARE_PREFIX(dci(0xd503223f), "psb"); // PSB_HC_hints
+
+ // ARMv8.3 - FCMA
+ COMPARE_PREFIX(dci(0x2e40c400), "fcmla"); // FCMLA_asimdsame2_C
+ COMPARE_PREFIX(dci(0x2e00e400), "fcadd"); // FCADD_asimdsame2_C
+ COMPARE_PREFIX(dci(0x2f401000), "fcmla"); // FCMLA_asimdelem_C_H
+ COMPARE_PREFIX(dci(0x6f801000), "fcmla"); // FCMLA_asimdelem_C_S
+
+ // ARMv8.3 - JSCVT
+ COMPARE_PREFIX(dci(0x1e7e0000), "fjcvtzs"); // FJCVTZS_32D_float2int
+
+ // ARMv8.3 - LRCPC
+ COMPARE_PREFIX(dci(0x38a0c000), "ldaprb"); // LDAPRB_32L_memop
+ COMPARE_PREFIX(dci(0x78a0c000), "ldaprh"); // LDAPRH_32L_memop
+ COMPARE_PREFIX(dci(0xb8a0c000), "ldapr"); // LDAPR_32L_memop
+ COMPARE_PREFIX(dci(0xf8a0c000), "ldapr"); // LDAPR_64L_memop
+
+ // ARMv8.3 - PAuth
+ COMPARE_PREFIX(dci(0x9ac03000), "pacga"); // PACGA_64P_dp_2src
+ COMPARE_PREFIX(dci(0xd50320ff), "xpaclri"); // XPACLRI_HI_hints
+ COMPARE_PREFIX(dci(0xd503211f), "pacia1716"); // PACIA1716_HI_hints
+ COMPARE_PREFIX(dci(0xd503215f), "pacib1716"); // PACIB1716_HI_hints
+ COMPARE_PREFIX(dci(0xd503219f), "autia1716"); // AUTIA1716_HI_hints
+ COMPARE_PREFIX(dci(0xd50321df), "autib1716"); // AUTIB1716_HI_hints
+ COMPARE_PREFIX(dci(0xd503231f), "paciaz"); // PACIAZ_HI_hints
+ COMPARE_PREFIX(dci(0xd503233f), "paciasp"); // PACIASP_HI_hints
+ COMPARE_PREFIX(dci(0xd503235f), "pacibz"); // PACIBZ_HI_hints
+ COMPARE_PREFIX(dci(0xd503237f), "pacibsp"); // PACIBSP_HI_hints
+ COMPARE_PREFIX(dci(0xd503239f), "autiaz"); // AUTIAZ_HI_hints
+ COMPARE_PREFIX(dci(0xd50323bf), "autiasp"); // AUTIASP_HI_hints
+ COMPARE_PREFIX(dci(0xd50323df), "autibz"); // AUTIBZ_HI_hints
+ COMPARE_PREFIX(dci(0xd50323ff), "autibsp"); // AUTIBSP_HI_hints
+ COMPARE_PREFIX(dci(0xd61f081f), "braaz"); // BRAAZ_64_branch_reg
+ COMPARE_PREFIX(dci(0xd61f0c1f), "brabz"); // BRABZ_64_branch_reg
+ COMPARE_PREFIX(dci(0xd63f081f), "blraaz"); // BLRAAZ_64_branch_reg
+ COMPARE_PREFIX(dci(0xd63f0c1f), "blrabz"); // BLRABZ_64_branch_reg
+ COMPARE_PREFIX(dci(0xd65f0bff), "retaa"); // RETAA_64E_branch_reg
+ COMPARE_PREFIX(dci(0xd65f0fff), "retab"); // RETAB_64E_branch_reg
+ // COMPARE_PREFIX(dci(0xd69f0bff), "eretaa"); // ERETAA_64E_branch_reg
+ // COMPARE_PREFIX(dci(0xd69f0fff), "eretab"); // ERETAB_64E_branch_reg
+ COMPARE_PREFIX(dci(0xd71f0800), "braa"); // BRAA_64P_branch_reg
+ COMPARE_PREFIX(dci(0xd71f0c00), "brab"); // BRAB_64P_branch_reg
+ COMPARE_PREFIX(dci(0xd73f0800), "blraa"); // BLRAA_64P_branch_reg
+ COMPARE_PREFIX(dci(0xd73f0c00), "blrab"); // BLRAB_64P_branch_reg
+ COMPARE_PREFIX(dci(0xdac10000), "pacia"); // PACIA_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac10400), "pacib"); // PACIB_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac10800), "pacda"); // PACDA_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac10c00), "pacdb"); // PACDB_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac11000), "autia"); // AUTIA_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac11400), "autib"); // AUTIB_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac11800), "autda"); // AUTDA_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac11c00), "autdb"); // AUTDB_64P_dp_1src
+ COMPARE_PREFIX(dci(0xdac123e0), "paciza"); // PACIZA_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac127e0), "pacizb"); // PACIZB_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac12be0), "pacdza"); // PACDZA_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac12fe0), "pacdzb"); // PACDZB_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac133e0), "autiza"); // AUTIZA_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac137e0), "autizb"); // AUTIZB_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac13be0), "autdza"); // AUTDZA_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac13fe0), "autdzb"); // AUTDZB_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac143e0), "xpaci"); // XPACI_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xdac147e0), "xpacd"); // XPACD_64Z_dp_1src
+ COMPARE_PREFIX(dci(0xf8200400), "ldraa"); // LDRAA_64_ldst_pac
+ COMPARE_PREFIX(dci(0xf8200c00), "ldraa"); // LDRAA_64W_ldst_pac
+ COMPARE_PREFIX(dci(0xf8a00400), "ldrab"); // LDRAB_64_ldst_pac
+ COMPARE_PREFIX(dci(0xf8a00c00), "ldrab"); // LDRAB_64W_ldst_pac
+
+ // ARMv8.4 - FlagM
+ COMPARE_PREFIX(dci(0x3a00080d), "setf8"); // SETF8_only_setf
+ COMPARE_PREFIX(dci(0x3a00480d), "setf16"); // SETF16_only_setf
+ COMPARE_PREFIX(dci(0xba000400), "rmif"); // RMIF_only_rmif
+ COMPARE_PREFIX(dci(0xd500401f), "cfinv"); // CFINV_M_pstate
+
+ // ARMv8.4 - LRCPC2
+ COMPARE_PREFIX(dci(0x19000000), "stlurb"); // STLURB_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x19400000), "ldapurb"); // LDAPURB_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x19800000), "ldapursb"); // LDAPURSB_64_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x19c00000), "ldapursb"); // LDAPURSB_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x59000000), "stlurh"); // STLURH_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x59400000), "ldapurh"); // LDAPURH_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x59800000), "ldapursh"); // LDAPURSH_64_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x59c00000), "ldapursh"); // LDAPURSH_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x99000000), "stlur"); // STLUR_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x99400000), "ldapur"); // LDAPUR_32_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0x99800000), "ldapursw"); // LDAPURSW_64_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0xd9000000), "stlur"); // STLUR_64_ldapstl_unscaled
+ COMPARE_PREFIX(dci(0xd9400000), "ldapur"); // LDAPUR_64_ldapstl_unscaled
+
+ // ARMv8.4 - TRF
+ // COMPARE_PREFIX(dci(0xd503225f), "tsb"); // TSB_HC_hints
+
+ // ARMv8.5 - BTI
+ COMPARE_PREFIX(dci(0xd503241f), "bti"); // BTI_HB_hints
+
+ // ARMv8.5 - FRINTTS
+ COMPARE_PREFIX(dci(0x0e21e800), "frint32z"); // FRINT32Z_asimdmisc_R
+ COMPARE_PREFIX(dci(0x0e21f800), "frint64z"); // FRINT64Z_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21e800), "frint32x"); // FRINT32X_asimdmisc_R
+ COMPARE_PREFIX(dci(0x2e21f800), "frint64x"); // FRINT64X_asimdmisc_R
+ COMPARE_PREFIX(dci(0x1e284000), "frint32z"); // FRINT32Z_S_floatdp1
+ COMPARE_PREFIX(dci(0x1e28c000), "frint32x"); // FRINT32X_S_floatdp1
+ COMPARE_PREFIX(dci(0x1e294000), "frint64z"); // FRINT64Z_S_floatdp1
+ COMPARE_PREFIX(dci(0x1e29c000), "frint64x"); // FRINT64X_S_floatdp1
+ COMPARE_PREFIX(dci(0x1e684000), "frint32z"); // FRINT32Z_D_floatdp1
+ COMPARE_PREFIX(dci(0x1e68c000), "frint32x"); // FRINT32X_D_floatdp1
+ COMPARE_PREFIX(dci(0x1e694000), "frint64z"); // FRINT64Z_D_floatdp1
+ COMPARE_PREFIX(dci(0x1e69c000), "frint64x"); // FRINT64X_D_floatdp1
+
+ // ARMv8.5 - FlagM2
+ COMPARE_PREFIX(dci(0xd500403f), "xaflag"); // XAFLAG_M_pstate
+ COMPARE_PREFIX(dci(0xd500405f), "axflag"); // AXFLAG_M_pstate
+
+ // ARMv8.5 - MTE
+ // COMPARE_PREFIX(dci(0x68800000), "stgp"); // STGP_64_ldstpair_post
+ // COMPARE_PREFIX(dci(0x69000000), "stgp"); // STGP_64_ldstpair_off
+ // COMPARE_PREFIX(dci(0x69800000), "stgp"); // STGP_64_ldstpair_pre
+ // COMPARE_PREFIX(dci(0x91800000), "addg"); // ADDG_64_addsub_immtags
+ // COMPARE_PREFIX(dci(0x9ac00000), "subp"); // SUBP_64S_dp_2src
+ // COMPARE_PREFIX(dci(0x9ac01000), "irg"); // IRG_64I_dp_2src
+ // COMPARE_PREFIX(dci(0x9ac01400), "gmi"); // GMI_64G_dp_2src
+ // COMPARE_PREFIX(dci(0xbac00000), "subps"); // SUBPS_64S_dp_2src
+ // COMPARE_PREFIX(dci(0xd1800000), "subg"); // SUBG_64_addsub_immtags
+ // COMPARE_PREFIX(dci(0xd9200400), "stg"); // STG_64Spost_ldsttags
+ // COMPARE_PREFIX(dci(0xd9200800), "stg"); // STG_64Soffset_ldsttags
+ // COMPARE_PREFIX(dci(0xd9200c00), "stg"); // STG_64Spre_ldsttags
+ // COMPARE_PREFIX(dci(0xd9600000), "ldg"); // LDG_64Loffset_ldsttags
+ // COMPARE_PREFIX(dci(0xd9600400), "stzg"); // STZG_64Spost_ldsttags
+ // COMPARE_PREFIX(dci(0xd9600800), "stzg"); // STZG_64Soffset_ldsttags
+ // COMPARE_PREFIX(dci(0xd9600c00), "stzg"); // STZG_64Spre_ldsttags
+ // COMPARE_PREFIX(dci(0xd9a00400), "st2g"); // ST2G_64Spost_ldsttags
+ // COMPARE_PREFIX(dci(0xd9a00800), "st2g"); // ST2G_64Soffset_ldsttags
+ // COMPARE_PREFIX(dci(0xd9a00c00), "st2g"); // ST2G_64Spre_ldsttags
+ // COMPARE_PREFIX(dci(0xd9e00400), "stz2g"); // STZ2G_64Spost_ldsttags
+ // COMPARE_PREFIX(dci(0xd9e00800), "stz2g"); // STZ2G_64Soffset_ldsttags
+ // COMPARE_PREFIX(dci(0xd9e00c00), "stz2g"); // STZ2G_64Spre_ldsttags
+
+ // ARMv8.5 - MTE2
+ // COMPARE_PREFIX(dci(0xd9200000), "stzgm"); // STZGM_64bulk_ldsttags
+ // COMPARE_PREFIX(dci(0xd9a00000), "stgm"); // STGM_64bulk_ldsttags
+ // COMPARE_PREFIX(dci(0xd9e00000), "ldgm"); // LDGM_64bulk_ldsttags
+
+ // ARMv8.6 - BF16
+ // COMPARE_PREFIX(dci(0x0ea16800), "bfcvtn"); // BFCVTN_asimdmisc_4S
+ // COMPARE_PREFIX(dci(0x0f40f000), "bfdot"); // BFDOT_asimdelem_E
+ // COMPARE_PREFIX(dci(0x0fc0f000), "bfmlal"); // BFMLAL_asimdelem_F
+ // COMPARE_PREFIX(dci(0x2e40fc00), "bfdot"); // BFDOT_asimdsame2_D
+ // COMPARE_PREFIX(dci(0x2ec0fc00), "bfmlal"); // BFMLAL_asimdsame2_F_
+ // COMPARE_PREFIX(dci(0x1e634000), "bfcvt"); // BFCVT_BS_floatdp1
+ // COMPARE_PREFIX(dci(0x6e40ec00), "bfmmla"); // BFMMLA_asimdsame2_E
+
+ // ARMv8.6 - DGH
+ // COMPARE_PREFIX(dci(0xd50320df), "dgh"); // DGH_HI_hints
+
+ // ARMv8.6 - I8MM
+ COMPARE_PREFIX(dci(0x0e809c00), "usdot"); // USDOT_asimdsame2_D
+ COMPARE_PREFIX(dci(0x0f00f000), "sudot"); // SUDOT_asimdelem_D
+ COMPARE_PREFIX(dci(0x0f80f000), "usdot"); // USDOT_asimdelem_D
+ COMPARE_PREFIX(dci(0x4e80a400), "smmla"); // SMMLA_asimdsame2_G
+ COMPARE_PREFIX(dci(0x4e80ac00), "usmmla"); // USMMLA_asimdsame2_G
+ COMPARE_PREFIX(dci(0x6e80a400), "ummla"); // UMMLA_asimdsame2_G
+
+ // ARMv8.7 - LS64
+ // COMPARE_PREFIX(dci(0xf83f9000), "st64b"); // ST64B_64L_memop
+ // COMPARE_PREFIX(dci(0xf83fd000), "ld64b"); // LD64B_64L_memop
+
+ // ARMv8.7 - LS64_V
+ // COMPARE_PREFIX(dci(0xf820a000), "st64bv0"); // ST64BV0_64_memop
+ // COMPARE_PREFIX(dci(0xf820b000), "st64bv"); // ST64BV_64_memop
+
+ // ARMv8.7 - WFxT
+ // COMPARE_PREFIX(dci(0xd5031000), "wfet"); // WFET_only_systeminstrswithreg
+ // COMPARE_PREFIX(dci(0xd5031020), "wfit"); // WFIT_only_systeminstrswithreg
+
+ // TME
+ // COMPARE_PREFIX(dci(0xd4600000), "tcancel"); // TCANCEL_EX_exception
+ // COMPARE_PREFIX(dci(0xd503307f), "tcommit"); // TCOMMIT_only_barriers
+ // COMPARE_PREFIX(dci(0xd5233060), "tstart"); // TSTART_BR_systemresult
+ // COMPARE_PREFIX(dci(0xd5233160), "ttest"); // TTEST_BR_systemresult
+
+ CLEANUP();
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.h b/test/aarch64/test-disasm-aarch64.h
index 9e16bc3d..5c65e8df 100644
--- a/test/aarch64/test-disasm-aarch64.h
+++ b/test/aarch64/test-disasm-aarch64.h
@@ -54,9 +54,10 @@
do { \
printf("----\n"); \
PrintDisassembler print_disasm(stdout); \
- Instruction* start = masm.GetBuffer()->GetStartAddress<Instruction*>(); \
- Instruction* end = masm.GetBuffer()->GetEndAddress<Instruction*>(); \
- print_disasm.DisassembleBuffer(start, end); \
+ Instruction* dis_start = \
+ masm.GetBuffer()->GetStartAddress<Instruction*>(); \
+ Instruction* dis_end = masm.GetBuffer()->GetEndAddress<Instruction*>(); \
+ print_disasm.DisassembleBuffer(dis_start, dis_end); \
} while (0)
#define COMPARE(ASM, EXP) \
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index a8e91e95..17957f5e 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -1745,6 +1745,11 @@ TEST(neon_3same) {
COMPARE_MACRO(Udot(v1.V4S(), v2.V16B(), v3.V16B()),
"udot v1.4s, v2.16b, v3.16b");
+ COMPARE_MACRO(Usdot(v7.V2S(), v9.V8B(), v30.V8B()),
+ "usdot v7.2s, v9.8b, v30.8b");
+ COMPARE_MACRO(Usdot(v7.V4S(), v9.V16B(), v30.V16B()),
+ "usdot v7.4s, v9.16b, v30.16b");
+
COMPARE_MACRO(And(v6.V8B(), v7.V8B(), v8.V8B()), "and v6.8b, v7.8b, v8.8b");
COMPARE_MACRO(And(v6.V16B(), v7.V16B(), v8.V16B()),
"and v6.16b, v7.16b, v8.16b");
@@ -2425,6 +2430,15 @@ TEST(neon_byelement) {
COMPARE_MACRO(Fmlsl2(v28.V4S(), v28.V4H(), v7.H(), 0),
"fmlsl2 v28.4s, v28.4h, v7.h[0]");
+ COMPARE_MACRO(Sudot(v10.V2S(), v21.V8B(), v31.S4B(), 0),
+ "sudot v10.2s, v21.8b, v31.4b[0]");
+ COMPARE_MACRO(Sudot(v12.V4S(), v23.V16B(), v16.S4B(), 3),
+ "sudot v12.4s, v23.16b, v16.4b[3]");
+ COMPARE_MACRO(Usdot(v10.V2S(), v21.V8B(), v31.S4B(), 0),
+ "usdot v10.2s, v21.8b, v31.4b[0]");
+ COMPARE_MACRO(Usdot(v12.V4S(), v23.V16B(), v16.S4B(), 3),
+ "usdot v12.4s, v23.16b, v16.4b[3]");
+
CLEANUP();
}
@@ -2434,56 +2448,64 @@ TEST(neon_fp_byelement) {
COMPARE_MACRO(Fmul(v0.V4H(), v1.V4H(), v2.H(), 0),
"fmul v0.4h, v1.4h, v2.h[0]");
- COMPARE_MACRO(Fmul(v2.V8H(), v3.V8H(), v15.H(), 3),
- "fmul v2.8h, v3.8h, v15.h[3]");
+ COMPARE_MACRO(Fmul(v2.V8H(), v3.V8H(), v15.H(), 7),
+ "fmul v2.8h, v3.8h, v15.h[7]");
COMPARE_MACRO(Fmul(v0.V2S(), v1.V2S(), v2.S(), 0),
"fmul v0.2s, v1.2s, v2.s[0]");
COMPARE_MACRO(Fmul(v2.V4S(), v3.V4S(), v15.S(), 3),
"fmul v2.4s, v3.4s, v15.s[3]");
COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 0),
"fmul v0.2d, v1.2d, v2.d[0]");
+ COMPARE_MACRO(Fmul(v0.V2D(), v1.V2D(), v2.D(), 1),
+ "fmul v0.2d, v1.2d, v2.d[1]");
COMPARE_MACRO(Fmul(d0, d1, v2.D(), 0), "fmul d0, d1, v2.d[0]");
COMPARE_MACRO(Fmul(s0, s1, v2.S(), 0), "fmul s0, s1, v2.s[0]");
COMPARE_MACRO(Fmul(h0, h1, v2.H(), 0), "fmul h0, h1, v2.h[0]");
COMPARE_MACRO(Fmla(v0.V4H(), v1.V4H(), v2.H(), 0),
"fmla v0.4h, v1.4h, v2.h[0]");
- COMPARE_MACRO(Fmla(v2.V8H(), v3.V8H(), v15.H(), 3),
- "fmla v2.8h, v3.8h, v15.h[3]");
+ COMPARE_MACRO(Fmla(v2.V8H(), v3.V8H(), v15.H(), 7),
+ "fmla v2.8h, v3.8h, v15.h[7]");
COMPARE_MACRO(Fmla(v0.V2S(), v1.V2S(), v2.S(), 0),
"fmla v0.2s, v1.2s, v2.s[0]");
COMPARE_MACRO(Fmla(v2.V4S(), v3.V4S(), v15.S(), 3),
"fmla v2.4s, v3.4s, v15.s[3]");
COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 0),
"fmla v0.2d, v1.2d, v2.d[0]");
+ COMPARE_MACRO(Fmla(v0.V2D(), v1.V2D(), v2.D(), 1),
+ "fmla v0.2d, v1.2d, v2.d[1]");
COMPARE_MACRO(Fmla(d0, d1, v2.D(), 0), "fmla d0, d1, v2.d[0]");
COMPARE_MACRO(Fmla(s0, s1, v2.S(), 0), "fmla s0, s1, v2.s[0]");
COMPARE_MACRO(Fmla(h0, h1, v2.H(), 0), "fmla h0, h1, v2.h[0]");
COMPARE_MACRO(Fmls(v0.V4H(), v1.V4H(), v2.H(), 0),
"fmls v0.4h, v1.4h, v2.h[0]");
- COMPARE_MACRO(Fmls(v2.V8H(), v3.V8H(), v15.H(), 3),
- "fmls v2.8h, v3.8h, v15.h[3]");
+ COMPARE_MACRO(Fmls(v2.V8H(), v3.V8H(), v15.H(), 7),
+ "fmls v2.8h, v3.8h, v15.h[7]");
COMPARE_MACRO(Fmls(v0.V2S(), v1.V2S(), v2.S(), 0),
"fmls v0.2s, v1.2s, v2.s[0]");
COMPARE_MACRO(Fmls(v2.V4S(), v3.V4S(), v15.S(), 3),
"fmls v2.4s, v3.4s, v15.s[3]");
COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 0),
"fmls v0.2d, v1.2d, v2.d[0]");
+ COMPARE_MACRO(Fmls(v0.V2D(), v1.V2D(), v2.D(), 1),
+ "fmls v0.2d, v1.2d, v2.d[1]");
COMPARE_MACRO(Fmls(d0, d1, v2.D(), 0), "fmls d0, d1, v2.d[0]");
COMPARE_MACRO(Fmls(s0, s1, v2.S(), 0), "fmls s0, s1, v2.s[0]");
COMPARE_MACRO(Fmls(h0, h1, v2.H(), 0), "fmls h0, h1, v2.h[0]");
COMPARE_MACRO(Fmulx(v0.V4H(), v1.V4H(), v2.H(), 0),
"fmulx v0.4h, v1.4h, v2.h[0]");
- COMPARE_MACRO(Fmulx(v2.V8H(), v3.V8H(), v15.H(), 3),
- "fmulx v2.8h, v3.8h, v15.h[3]");
+ COMPARE_MACRO(Fmulx(v2.V8H(), v3.V8H(), v15.H(), 7),
+ "fmulx v2.8h, v3.8h, v15.h[7]");
COMPARE_MACRO(Fmulx(v0.V2S(), v1.V2S(), v2.S(), 0),
"fmulx v0.2s, v1.2s, v2.s[0]");
COMPARE_MACRO(Fmulx(v2.V4S(), v3.V4S(), v8.S(), 3),
"fmulx v2.4s, v3.4s, v8.s[3]");
COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 0),
"fmulx v0.2d, v1.2d, v2.d[0]");
+ COMPARE_MACRO(Fmulx(v0.V2D(), v1.V2D(), v2.D(), 1),
+ "fmulx v0.2d, v1.2d, v2.d[1]");
COMPARE_MACRO(Fmulx(d0, d1, v2.D(), 0), "fmulx d0, d1, v2.d[0]");
COMPARE_MACRO(Fmulx(s0, s1, v2.S(), 0), "fmulx s0, s1, v2.s[0]");
COMPARE_MACRO(Fmulx(h0, h1, v2.H(), 0), "fmulx h0, h1, v2.h[0]");
@@ -3099,6 +3121,13 @@ TEST(neon_modimm) {
COMPARE_MACRO(Movi(v1.V2D(), 0xffff0000ffffff),
"movi v1.2d, #0xffff0000ffffff");
+ COMPARE_MACRO(Movi(v2.V2D(), 0xff00ff00ff00ff, 0xff00ff00ff00ff),
+ "movi v2.2d, #0xff00ff00ff00ff");
+ COMPARE_MACRO(Movi(v3.V2D(), 0xffff, 0xff00ff00ff00ff),
+ "movi v3.2d, #0xff00ff00ff00ff\n"
+ "mov x16, #0xffff\n"
+ "mov v3.d[1], x16");
+
COMPARE_MACRO(Fmov(v0.V2S(), 1.0f), "fmov v0.2s, #0x70 (1.0000)");
COMPARE_MACRO(Fmov(v31.V2S(), -13.0f), "fmov v31.2s, #0xaa (-13.0000)");
COMPARE_MACRO(Fmov(v0.V4S(), 1.0f), "fmov v0.4s, #0x70 (1.0000)");
@@ -3135,9 +3164,9 @@ TEST(neon_2regmisc) {
COMPARE_MACRO(Shll2(v6.V2D(), v4.V4S(), 32), "shll2 v6.2d, v4.4s, #32");
// An unallocated form of shll.
- COMPARE(dci(0x2ee13bff), "unallocated (NEON2RegMisc)");
+ COMPARE(dci(0x2ee13bff), "unallocated (Unallocated)");
// An unallocated form of shll2.
- COMPARE(dci(0x6ee13bff), "unallocated (NEON2RegMisc)");
+ COMPARE(dci(0x6ee13bff), "unallocated (Unallocated)");
#define DISASM_INST(M, S) \
COMPARE_MACRO(Cmeq(v0.M, v1.M, 0), "cmeq v0." S ", v1." S ", #0");
@@ -4350,5 +4379,299 @@ TEST(neon_shift_immediate) {
CLEANUP();
}
+TEST(neon_matmul) {
+ SETUP();
+
+ COMPARE_MACRO(Smmla(v0.V4S(), v1.V16B(), v2.V16B()),
+ "smmla v0.4s, v1.16b, v2.16b");
+ COMPARE_MACRO(Ummla(v20.V4S(), v30.V16B(), v31.V16B()),
+ "ummla v20.4s, v30.16b, v31.16b");
+ COMPARE_MACRO(Usmmla(v3.V4S(), v29.V16B(), v13.V16B()),
+ "usmmla v3.4s, v29.16b, v13.16b");
+
+ CLEANUP();
+}
+
+TEST(neon_unallocated_regression_test) {
+ SETUP();
+
+ COMPARE_PREFIX(dci(0x5e20b985), "unallocated"); // abs b, b
+ COMPARE_PREFIX(dci(0x5e60b8e3), "unallocated"); // abs h, h
+ COMPARE_PREFIX(dci(0x5ea0b8d1), "unallocated"); // abs s, s
+ COMPARE_PREFIX(dci(0x5e318764), "unallocated"); // add b, b, b
+ COMPARE_PREFIX(dci(0x5e7f877a), "unallocated"); // add h, h, h
+ COMPARE_PREFIX(dci(0x5eb8842b), "unallocated"); // add s, s, s
+ COMPARE_PREFIX(dci(0x5eb1bbb7), "unallocated"); // addp s, v.s
+ COMPARE_PREFIX(dci(0x4ef1b90c), "unallocated"); // addv d, v.d
+ COMPARE_PREFIX(dci(0x0ef1babc), "unallocated"); // addv d, v.und
+ COMPARE_PREFIX(dci(0x4ee04a0f), "unallocated"); // cls v.d, v.d
+ COMPARE_PREFIX(dci(0x6ee048ef), "unallocated"); // clz v.d, v.d
+ COMPARE_PREFIX(dci(0x5e2099b4), "unallocated"); // cmeq b, b, #
+ COMPARE_PREFIX(dci(0x7e3a8c75), "unallocated"); // cmeq b, b, b
+ COMPARE_PREFIX(dci(0x5e6099a2), "unallocated"); // cmeq h, h, #
+ COMPARE_PREFIX(dci(0x7e7e8ce2), "unallocated"); // cmeq h, h, h
+ COMPARE_PREFIX(dci(0x5ea09a20), "unallocated"); // cmeq s, s, #
+ COMPARE_PREFIX(dci(0x7ea98fbd), "unallocated"); // cmeq s, s, s
+ COMPARE_PREFIX(dci(0x7e208ad0), "unallocated"); // cmge b, b, #
+ COMPARE_PREFIX(dci(0x5e233f3f), "unallocated"); // cmge b, b, b
+ COMPARE_PREFIX(dci(0x7e608b4e), "unallocated"); // cmge h, h, #
+ COMPARE_PREFIX(dci(0x5e643f87), "unallocated"); // cmge h, h, h
+ COMPARE_PREFIX(dci(0x7ea08b3c), "unallocated"); // cmge s, s, #
+ COMPARE_PREFIX(dci(0x5ea63e20), "unallocated"); // cmge s, s, s
+ COMPARE_PREFIX(dci(0x5e208837), "unallocated"); // cmgt b, b, #
+ COMPARE_PREFIX(dci(0x5e2f3591), "unallocated"); // cmgt b, b, b
+ COMPARE_PREFIX(dci(0x5e608bde), "unallocated"); // cmgt h, h, #
+ COMPARE_PREFIX(dci(0x5e7f377b), "unallocated"); // cmgt h, h, h
+ COMPARE_PREFIX(dci(0x5ea08813), "unallocated"); // cmgt s, s, #
+ COMPARE_PREFIX(dci(0x5ead3429), "unallocated"); // cmgt s, s, s
+ COMPARE_PREFIX(dci(0x7e23373d), "unallocated"); // cmhi b, b, b
+ COMPARE_PREFIX(dci(0x7e7937c1), "unallocated"); // cmhi h, h, h
+ COMPARE_PREFIX(dci(0x7ea6361e), "unallocated"); // cmhi s, s, s
+ COMPARE_PREFIX(dci(0x7e3c3e4a), "unallocated"); // cmhs b, b, b
+ COMPARE_PREFIX(dci(0x7e653cb8), "unallocated"); // cmhs h, h, h
+ COMPARE_PREFIX(dci(0x7eb03d39), "unallocated"); // cmhs s, s, s
+ COMPARE_PREFIX(dci(0x7e209894), "unallocated"); // cmle b, b, #
+ COMPARE_PREFIX(dci(0x7e609882), "unallocated"); // cmle h, h, #
+ COMPARE_PREFIX(dci(0x7ea09900), "unallocated"); // cmle s, s, #
+ COMPARE_PREFIX(dci(0x5e20a808), "unallocated"); // cmlt b, b, #
+ COMPARE_PREFIX(dci(0x5e60ab1f), "unallocated"); // cmlt h, h, #
+ COMPARE_PREFIX(dci(0x5ea0ab0d), "unallocated"); // cmlt s, s, #
+ COMPARE_PREFIX(dci(0x5e218cda), "unallocated"); // cmtst b, b, b
+ COMPARE_PREFIX(dci(0x5e718ec4), "unallocated"); // cmtst h, h, h
+ COMPARE_PREFIX(dci(0x5eb38ccd), "unallocated"); // cmtst s, s, s
+ COMPARE_PREFIX(dci(0x4ee05863), "unallocated"); // cnt v.d, v.d
+ COMPARE_PREFIX(dci(0x4e605887), "unallocated"); // cnt v.h, v.h
+ COMPARE_PREFIX(dci(0x4ea05875), "unallocated"); // cnt v.s, v.s
+ COMPARE_PREFIX(dci(0x0ee05a13), "unallocated"); // cnt v.und, v.und
+ COMPARE_PREFIX(dci(0x2e0f419d), "unallocated"); // ext v.b, v.b, v.b, #
+ COMPARE_PREFIX(dci(0x7e216950), "unallocated"); // fcvtxn h, s
+ COMPARE_PREFIX(dci(0x6e216950), "unallocated"); // fcvtxn v.h, v.s
+ COMPARE_PREFIX(dci(0x5f08fc37), "unallocated"); // fcvtzs b, b, #
+ COMPARE_PREFIX(dci(0x4f0cfcb6), "unallocated"); // fcvtzs v.b, v.b, #
+ COMPARE_PREFIX(dci(0x7f08fed0), "unallocated"); // fcvtzu b, b, #
+ COMPARE_PREFIX(dci(0x6f0dfc80), "unallocated"); // fcvtzu v.b, v.b, #
+ COMPARE_PREFIX(dci(0x6e70c813), "unallocated"); // fmaxnmv d, v.d
+ COMPARE_PREFIX(dci(0x2e70ca53), "unallocated"); // fmaxnmv d, v.und
+ COMPARE_PREFIX(dci(0x2e30ca65), "unallocated"); // fmaxnmv s, v.s
+ COMPARE_PREFIX(dci(0x6e70fbfa), "unallocated"); // fmaxv d, v.d
+ COMPARE_PREFIX(dci(0x2e70fa81), "unallocated"); // fmaxv d, v.und
+ COMPARE_PREFIX(dci(0x2e30fb23), "unallocated"); // fmaxv s, v.s
+ COMPARE_PREFIX(dci(0x6ef0c87f), "unallocated"); // fminnmv d, v.d
+ COMPARE_PREFIX(dci(0x2ef0ca2f), "unallocated"); // fminnmv d, v.und
+ COMPARE_PREFIX(dci(0x2eb0ca41), "unallocated"); // fminnmv s, v.s
+ COMPARE_PREFIX(dci(0x6ef0f8ad), "unallocated"); // fminv d, v.d
+ COMPARE_PREFIX(dci(0x2ef0faed), "unallocated"); // fminv d, v.und
+ COMPARE_PREFIX(dci(0x2eb0faff), "unallocated"); // fminv s, v.s
+ COMPARE_PREFIX(dci(0x0fc61a34), "unallocated"); // fmla v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x0fed5909), "unallocated"); // fmls v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x0fd09a0a), "unallocated"); // fmul v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x2fdf99fc), "unallocated"); // fmulx v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x6f310336), "unallocated"); // mla v.b, v.b, v.b[]
+ COMPARE_PREFIX(dci(0x4efd978f), "unallocated"); // mla v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6fe80bb3), "unallocated"); // mla v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2fda0aa2), "unallocated"); // mla v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x6f0f4035), "unallocated"); // mls v.b, v.b, v.b[]
+ COMPARE_PREFIX(dci(0x6eee95ed), "unallocated"); // mls v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ffa43fa), "unallocated"); // mls v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2ffd4186), "unallocated"); // mls v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x4f2482ac), "unallocated"); // mul v.b, v.b, v.b[]
+ COMPARE_PREFIX(dci(0x4efc9d87), "unallocated"); // mul v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4fc58321), "unallocated"); // mul v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0fef8b9b), "unallocated"); // mul v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x7e20b865), "unallocated"); // neg b, b
+ COMPARE_PREFIX(dci(0x7e60b853), "unallocated"); // neg h, h
+ COMPARE_PREFIX(dci(0x7ea0bbfa), "unallocated"); // neg s, s
+ COMPARE_PREFIX(dci(0x6eea9c50), "unallocated"); // pmul v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x2e789e4c), "unallocated"); // pmul v.h, v.h, v.h
+ COMPARE_PREFIX(dci(0x2ea39e8e), "unallocated"); // pmul v.s, v.s, v.s
+ COMPARE_PREFIX(dci(0x2efb9dbd), "unallocated"); // pmul v.und, v.und, v.und
+ COMPARE_PREFIX(dci(0x4eace101), "unallocated"); // pmull v.d, v.s, v.s
+ COMPARE_PREFIX(dci(0x0e6de3ad), "unallocated"); // pmull v.s, v.h, v.h
+ COMPARE_PREFIX(dci(0x4ee3e2c0), "unallocated"); // pmull v.und, v.d, v.d
+ COMPARE_PREFIX(dci(0x0eede060), "unallocated"); // pmull v.und, v.und, v.und
+ COMPARE_PREFIX(dci(0x6ee00afd), "unallocated"); // rev v.d, v.d
+ COMPARE_PREFIX(dci(0x4e601975), "unallocated"); // rev v.h, v.h
+ COMPARE_PREFIX(dci(0x4ea019f3), "unallocated"); // rev v.s, v.s
+ COMPARE_PREFIX(dci(0x2ee00984), "unallocated"); // rev v.und, v.und
+ COMPARE_PREFIX(dci(0x4ef07cc9), "unallocated"); // saba v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ee57554), "unallocated"); // sabd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x0eb03927), "unallocated"); // saddlv d, v.s
+ COMPARE_PREFIX(dci(0x5f0de4b1), "unallocated"); // scvtf b, b, #
+ COMPARE_PREFIX(dci(0x4f08e468), "unallocated"); // scvtf v.b, v.b, #
+ COMPARE_PREFIX(dci(0x4eed07a2), "unallocated"); // shadd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x5f0b54ec), "unallocated"); // shl b, b, #
+ COMPARE_PREFIX(dci(0x5f1f56d7), "unallocated"); // shl h, h, #
+ COMPARE_PREFIX(dci(0x5f205498), "unallocated"); // shl s, s, #
+ COMPARE_PREFIX(dci(0x4ef7256c), "unallocated"); // shsub v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x7f095521), "unallocated"); // sli b, b, #
+ COMPARE_PREFIX(dci(0x7f1d579c), "unallocated"); // sli h, h, #
+ COMPARE_PREFIX(dci(0x7f21578b), "unallocated"); // sli s, s, #
+ COMPARE_PREFIX(dci(0x4eeb662a), "unallocated"); // smax v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ef6a53f), "unallocated"); // smaxp v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ef0aa5e), "unallocated"); // smaxv d, v.d
+ COMPARE_PREFIX(dci(0x0eb0ab90), "unallocated"); // smaxv s, v.s
+ COMPARE_PREFIX(dci(0x4eeb6d0c), "unallocated"); // smin v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ee4ac0a), "unallocated"); // sminp v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ef1aab8), "unallocated"); // sminv d, v.d
+ COMPARE_PREFIX(dci(0x0eb1a951), "unallocated"); // sminv s, v.s
+ COMPARE_PREFIX(dci(0x4fd32bd8), "unallocated"); // smlal v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0ff32a9e), "unallocated"); // smlal v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x4ffa6aad), "unallocated"); // smlsl v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0ff56af4), "unallocated"); // smlsl v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x0e182f6f), "unallocated"); // smov w, v.d[]
+ COMPARE_PREFIX(dci(0x0e042d84), "unallocated"); // smov w, v.s[]
+ COMPARE_PREFIX(dci(0x4e082c53), "unallocated"); // smov x, v.d[]
+ COMPARE_PREFIX(dci(0x4fcfa8ed), "unallocated"); // smull v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0fdba861), "unallocated"); // smull v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x5e2f93e4), "unallocated"); // sqdmlal h, b, b
+ COMPARE_PREFIX(dci(0x5f113b27), "unallocated"); // sqdmlal h, b, v.b[]
+ COMPARE_PREFIX(dci(0x5fff3b58), "unallocated"); // sqdmlal undd, d, v.d[]
+ COMPARE_PREFIX(dci(0x0e2491d8), "unallocated"); // sqdmlal v.h, v.b, v.b
+ COMPARE_PREFIX(dci(0x4fdb3b2a), "unallocated"); // sqdmlal v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0ffc3a4a),
+ "unallocated"); // sqdmlal v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x5e3eb3a7), "unallocated"); // sqdmlsl h, b, b
+ COMPARE_PREFIX(dci(0x5f337121), "unallocated"); // sqdmlsl h, b, v.b[]
+ COMPARE_PREFIX(dci(0x5fd378ae), "unallocated"); // sqdmlsl undd, d, v.d[]
+ COMPARE_PREFIX(dci(0x4e3eb3a7), "unallocated"); // sqdmlsl v.h, v.b, v.b
+ COMPARE_PREFIX(dci(0x4fda78c2), "unallocated"); // sqdmlsl v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0ff279b3),
+ "unallocated"); // sqdmlsl v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x5e34b7b7), "unallocated"); // sqdmulh b, b, b
+ COMPARE_PREFIX(dci(0x5f16c106), "unallocated"); // sqdmulh b, b, v.b[]
+ COMPARE_PREFIX(dci(0x5ef8b447), "unallocated"); // sqdmulh d, d, d
+ COMPARE_PREFIX(dci(0x5fc5c113), "unallocated"); // sqdmulh d, d, v.d[]
+ COMPARE_PREFIX(dci(0x4e33b6cd), "unallocated"); // sqdmulh v.b, v.b, v.b
+ COMPARE_PREFIX(dci(0x4f3bc21e), "unallocated"); // sqdmulh v.b, v.b, v.b[]
+ COMPARE_PREFIX(dci(0x4eefb738), "unallocated"); // sqdmulh v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4ff2cb2b), "unallocated"); // sqdmulh v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0feacbdd),
+ "unallocated"); // sqdmulh v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x5e3ed2e8), "unallocated"); // sqdmull h, b, b
+ COMPARE_PREFIX(dci(0x5f23b842), "unallocated"); // sqdmull h, b, v.b[]
+ COMPARE_PREFIX(dci(0x5fc8ba56), "unallocated"); // sqdmull undd, d, v.d[]
+ COMPARE_PREFIX(dci(0x4e38d125), "unallocated"); // sqdmull v.h, v.b, v.b
+ COMPARE_PREFIX(dci(0x4ff5b8b3), "unallocated"); // sqdmull v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0fdcbac8),
+ "unallocated"); // sqdmull v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x7fcdd950), "unallocated"); // sqrdmlah d, d, v.d[]
+ COMPARE_PREFIX(dci(0x6fd6d80f), "unallocated"); // sqrdmlah v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2fecdae5),
+ "unallocated"); // sqrdmlah v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x7fe0f992), "unallocated"); // sqrdmlsh d, d, v.d[]
+ COMPARE_PREFIX(dci(0x6ff1f9df), "unallocated"); // sqrdmlsh v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2fcdfad1),
+ "unallocated"); // sqrdmlsh v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x7e23b7fa), "unallocated"); // sqrdmulh b, b, b
+ COMPARE_PREFIX(dci(0x5f1ad272), "unallocated"); // sqrdmulh b, b, v.b[]
+ COMPARE_PREFIX(dci(0x7ef8b6e0), "unallocated"); // sqrdmulh d, d, d
+ COMPARE_PREFIX(dci(0x5fd7d2a7), "unallocated"); // sqrdmulh d, d, v.d[]
+ COMPARE_PREFIX(dci(0x6e23b7fa), "unallocated"); // sqrdmulh v.b, v.b, v.b
+ COMPARE_PREFIX(dci(0x4f28d32a), "unallocated"); // sqrdmulh v.b, v.b, v.b[]
+ COMPARE_PREFIX(dci(0x6ef0b702), "unallocated"); // sqrdmulh v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x4feddb3f), "unallocated"); // sqrdmulh v.d, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x0fdddaf3),
+ "unallocated"); // sqrdmulh v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x5f679d27), "unallocated"); // sqrshrn d, d, #
+ COMPARE_PREFIX(dci(0x5f4d9f3b), "unallocated"); // sqrshrn d, h, #
+ COMPARE_PREFIX(dci(0x5f569dfa), "unallocated"); // sqrshrn d, s, #
+ COMPARE_PREFIX(dci(0x5f449d53), "unallocated"); // sqrshrn d, undn, #
+ COMPARE_PREFIX(dci(0x4f609da3), "unallocated"); // sqrshrn v.d, v.und, #
+ COMPARE_PREFIX(dci(0x7f698cee), "unallocated"); // sqrshrun d, d, #
+ COMPARE_PREFIX(dci(0x7f498fd8), "unallocated"); // sqrshrun d, h, #
+ COMPARE_PREFIX(dci(0x7f5d8e9a), "unallocated"); // sqrshrun d, s, #
+ COMPARE_PREFIX(dci(0x7f478e04), "unallocated"); // sqrshrun d, undn, #
+ COMPARE_PREFIX(dci(0x6f568c7d), "unallocated"); // sqrshrun v.d, v.und, #
+ COMPARE_PREFIX(dci(0x5f779488), "unallocated"); // sqshrn d, d, #
+ COMPARE_PREFIX(dci(0x5f4b9715), "unallocated"); // sqshrn d, h, #
+ COMPARE_PREFIX(dci(0x5f579449), "unallocated"); // sqshrn d, s, #
+ COMPARE_PREFIX(dci(0x5f4695ac), "unallocated"); // sqshrn d, undn, #
+ COMPARE_PREFIX(dci(0x4f6096c1), "unallocated"); // sqshrn v.d, v.und, #
+ COMPARE_PREFIX(dci(0x7f6786d1), "unallocated"); // sqshrun d, d, #
+ COMPARE_PREFIX(dci(0x7f4884e3), "unallocated"); // sqshrun d, h, #
+ COMPARE_PREFIX(dci(0x7f5886df), "unallocated"); // sqshrun d, s, #
+ COMPARE_PREFIX(dci(0x7f448464), "unallocated"); // sqshrun d, undn, #
+ COMPARE_PREFIX(dci(0x6f5b8674), "unallocated"); // sqshrun v.d, v.und, #
+ COMPARE_PREFIX(dci(0x4ef2163f), "unallocated"); // srhadd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x7f09475d), "unallocated"); // sri b, b, #
+ COMPARE_PREFIX(dci(0x7f104771), "unallocated"); // sri h, h, #
+ COMPARE_PREFIX(dci(0x7f2045b4), "unallocated"); // sri s, s, #
+ COMPARE_PREFIX(dci(0x5e2656a3), "unallocated"); // srshl b, b, b
+ COMPARE_PREFIX(dci(0x5e605767), "unallocated"); // srshl h, h, h
+ COMPARE_PREFIX(dci(0x5eb654c2), "unallocated"); // srshl s, s, s
+ COMPARE_PREFIX(dci(0x5f0827c2), "unallocated"); // srshr b, b, #
+ COMPARE_PREFIX(dci(0x5f13249c), "unallocated"); // srshr h, h, #
+ COMPARE_PREFIX(dci(0x5f3526af), "unallocated"); // srshr s, s, #
+ COMPARE_PREFIX(dci(0x5f0e34b0), "unallocated"); // srsra b, b, #
+ COMPARE_PREFIX(dci(0x5f1537ed), "unallocated"); // srsra h, h, #
+ COMPARE_PREFIX(dci(0x5f3934f2), "unallocated"); // srsra s, s, #
+ COMPARE_PREFIX(dci(0x5e24470b), "unallocated"); // sshl b, b, b
+ COMPARE_PREFIX(dci(0x5e624525), "unallocated"); // sshl h, h, h
+ COMPARE_PREFIX(dci(0x5ea846d6), "unallocated"); // sshl s, s, s
+ COMPARE_PREFIX(dci(0x5f0a07bc), "unallocated"); // sshr b, b, #
+ COMPARE_PREFIX(dci(0x5f1d0504), "unallocated"); // sshr h, h, #
+ COMPARE_PREFIX(dci(0x5f3e059d), "unallocated"); // sshr s, s, #
+ COMPARE_PREFIX(dci(0x5f0d17ae), "unallocated"); // ssra b, b, #
+ COMPARE_PREFIX(dci(0x5f1417c2), "unallocated"); // ssra h, h, #
+ COMPARE_PREFIX(dci(0x5f2214c1), "unallocated"); // ssra s, s, #
+ COMPARE_PREFIX(dci(0x7e3a8503), "unallocated"); // sub b, b, b
+ COMPARE_PREFIX(dci(0x7e748657), "unallocated"); // sub h, h, h
+ COMPARE_PREFIX(dci(0x7eaf844c), "unallocated"); // sub s, s, s
+ COMPARE_PREFIX(dci(0x6efb7c3c), "unallocated"); // uaba v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ee2749f), "unallocated"); // uabd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x2eb03807), "unallocated"); // uaddlv d, v.s
+ COMPARE_PREFIX(dci(0x7f08e671), "unallocated"); // ucvtf b, b, #
+ COMPARE_PREFIX(dci(0x6f0ee59b), "unallocated"); // ucvtf v.b, v.b, #
+ COMPARE_PREFIX(dci(0x6eef052d), "unallocated"); // uhadd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6eef2707), "unallocated"); // uhsub v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ee6675a), "unallocated"); // umax v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ef2a430), "unallocated"); // umaxp v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ef0a8ae), "unallocated"); // umaxv d, v.d
+ COMPARE_PREFIX(dci(0x2eb0aa70), "unallocated"); // umaxv s, v.s
+ COMPARE_PREFIX(dci(0x6efd6d23), "unallocated"); // umin v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ee2accf), "unallocated"); // uminp v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x6ef1aa28), "unallocated"); // uminv d, v.d
+ COMPARE_PREFIX(dci(0x2eb1a831), "unallocated"); // uminv s, v.s
+ COMPARE_PREFIX(dci(0x6ffa2b0b), "unallocated"); // umlal v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2fdb2acd), "unallocated"); // umlal v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x6fe76bb5), "unallocated"); // umlsl v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2ff068fb), "unallocated"); // umlsl v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x6fd0a947), "unallocated"); // umull v.und, v.d, v.d[]
+ COMPARE_PREFIX(dci(0x2fc0a8fb), "unallocated"); // umull v.und, v.und, v.d[]
+ COMPARE_PREFIX(dci(0x7f6e9c1b), "unallocated"); // uqrshrn d, d, #
+ COMPARE_PREFIX(dci(0x7f4d9e1b), "unallocated"); // uqrshrn d, h, #
+ COMPARE_PREFIX(dci(0x7f5e9d48), "unallocated"); // uqrshrn d, s, #
+ COMPARE_PREFIX(dci(0x7f419d2e), "unallocated"); // uqrshrn d, undn, #
+ COMPARE_PREFIX(dci(0x6f779e93), "unallocated"); // uqrshrn v.d, v.und, #
+ COMPARE_PREFIX(dci(0x7f649620), "unallocated"); // uqshrn d, d, #
+ COMPARE_PREFIX(dci(0x7f4a950b), "unallocated"); // uqshrn d, h, #
+ COMPARE_PREFIX(dci(0x7f55950e), "unallocated"); // uqshrn d, s, #
+ COMPARE_PREFIX(dci(0x7f4697b5), "unallocated"); // uqshrn d, undn, #
+ COMPARE_PREFIX(dci(0x6f749463), "unallocated"); // uqshrn v.d, v.und, #
+ COMPARE_PREFIX(dci(0x4ee1c88d), "unallocated"); // urecpe v.d, v.d
+ COMPARE_PREFIX(dci(0x6eed17ff), "unallocated"); // urhadd v.d, v.d, v.d
+ COMPARE_PREFIX(dci(0x7e30549c), "unallocated"); // urshl b, b, b
+ COMPARE_PREFIX(dci(0x7e6157c1), "unallocated"); // urshl h, h, h
+ COMPARE_PREFIX(dci(0x7eb65432), "unallocated"); // urshl s, s, s
+ COMPARE_PREFIX(dci(0x7f0b2637), "unallocated"); // urshr b, b, #
+ COMPARE_PREFIX(dci(0x7f13240c), "unallocated"); // urshr h, h, #
+ COMPARE_PREFIX(dci(0x7f232578), "unallocated"); // urshr s, s, #
+ COMPARE_PREFIX(dci(0x6ee1ca96), "unallocated"); // ursqrte v.d, v.d
+ COMPARE_PREFIX(dci(0x7f0a375a), "unallocated"); // ursra b, b, #
+ COMPARE_PREFIX(dci(0x7f12340f), "unallocated"); // ursra h, h, #
+ COMPARE_PREFIX(dci(0x7f2f3549), "unallocated"); // ursra s, s, #
+ COMPARE_PREFIX(dci(0x7e2d47d3), "unallocated"); // ushl b, b, b
+ COMPARE_PREFIX(dci(0x7e694742), "unallocated"); // ushl h, h, h
+ COMPARE_PREFIX(dci(0x7eab45db), "unallocated"); // ushl s, s, s
+ COMPARE_PREFIX(dci(0x7f0d0631), "unallocated"); // ushr b, b, #
+ COMPARE_PREFIX(dci(0x7f1805a4), "unallocated"); // ushr h, h, #
+ COMPARE_PREFIX(dci(0x7f2e063a), "unallocated"); // ushr s, s, #
+ COMPARE_PREFIX(dci(0x7f0c15a4), "unallocated"); // usra b, b, #
+ COMPARE_PREFIX(dci(0x7f1716c7), "unallocated"); // usra h, h, #
+ COMPARE_PREFIX(dci(0x7f261749), "unallocated"); // usra s, s, #
+
+ CLEANUP();
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-sve-aarch64.cc b/test/aarch64/test-disasm-sve-aarch64.cc
index 46128d91..933e808c 100644
--- a/test/aarch64/test-disasm-sve-aarch64.cc
+++ b/test/aarch64/test-disasm-sve-aarch64.cc
@@ -44,24 +44,81 @@ TEST(sve) {
SETUP();
// TODO: Replace these tests when the disassembler is more capable.
- COMPARE_PREFIX(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
- "asrr z0.b, p7/m, z0.b, z1.b");
- COMPARE_PREFIX(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()),
- "fcmeq p6.d, p7/z, z0.d, z1.d");
- COMPARE_PREFIX(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
- "mla z0.b, p7/m, z0.b, z1.b");
- COMPARE_PREFIX(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()),
- "mla z1.s, p7/m, z1.s, z0.s");
- COMPARE_PREFIX(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8");
- COMPARE_PREFIX(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15");
- COMPARE_PREFIX(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32");
- COMPARE_PREFIX(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()),
- "and p6.b, p7/z, p6.b, p7.b");
- COMPARE_PREFIX(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b");
- COMPARE_PREFIX(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()),
- "splice z0.h, p7, z0.h, z1.h");
- COMPARE_PREFIX(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()),
- "fnmad z0.d, p6/m, z1.d, z0.d");
+ COMPARE(asrr(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
+ "asrr z0.b, p7/m, z0.b, z1.b");
+ COMPARE(fcmeq(p6.VnD(), p7.Zeroing(), z0.VnD(), z1.VnD()),
+ "fcmeq p6.d, p7/z, z0.d, z1.d");
+ COMPARE(mla(z0.VnB(), p7.Merging(), z0.VnB(), z1.VnB()),
+ "mla z0.b, p7/m, z0.b, z1.b");
+ COMPARE(mla(z1.VnS(), p7.Merging(), z1.VnS(), z0.VnS()),
+ "mla z1.s, p7/m, z1.s, z0.s");
+ COMPARE(asr(z1.VnB(), z0.VnB(), 8), "asr z1.b, z0.b, #8");
+ COMPARE(lsl(z0.VnH(), z1.VnH(), 15), "lsl z0.h, z1.h, #15");
+ COMPARE(lsr(z1.VnS(), z0.VnS(), 32), "lsr z1.s, z0.s, #32");
+ COMPARE(and_(p6.VnB(), p7.Zeroing(), p6.VnB(), p7.VnB()),
+ "and p6.b, p7/z, p6.b, p7.b");
+ COMPARE(rev(p7.VnB(), p6.VnB()), "rev p7.b, p6.b");
+ COMPARE(splice(z0.VnH(), p7, z0.VnH(), z1.VnH()),
+ "splice z0.h, p7, z0.h, z1.h");
+ COMPARE(fnmad(z0.VnD(), p6.Merging(), z1.VnD(), z0.VnD()),
+ "fnmad z0.d, p6/m, z1.d, z0.d");
+
+ CLEANUP();
+}
+
+TEST(sve_unallocated_fp_byte_type) {
+ // Ensure disassembly of FP instructions does not report byte-sized lanes.
+
+ SETUP();
+
+ COMPARE_PREFIX(dci(0x650003ca), "unallocated");
+ COMPARE_PREFIX(dci(0x6500230b), "unallocated");
+ COMPARE_PREFIX(dci(0x6500424c), "unallocated");
+ COMPARE_PREFIX(dci(0x6500618d), "unallocated");
+ COMPARE_PREFIX(dci(0x6500a00f), "unallocated");
+ COMPARE_PREFIX(dci(0x6500de91), "unallocated");
+ COMPARE_PREFIX(dci(0x6500fdd2), "unallocated");
+ COMPARE_PREFIX(dci(0x65011d13), "unallocated");
+ COMPARE_PREFIX(dci(0x65015b95), "unallocated");
+ COMPARE_PREFIX(dci(0x65017ad6), "unallocated");
+ COMPARE_PREFIX(dci(0x65019a17), "unallocated");
+ COMPARE_PREFIX(dci(0x6501b958), "unallocated");
+ COMPARE_PREFIX(dci(0x6502941f), "unallocated");
+ COMPARE_PREFIX(dci(0x6502b360), "unallocated");
+ COMPARE_PREFIX(dci(0x6502d2a1), "unallocated");
+ COMPARE_PREFIX(dci(0x65038e27), "unallocated");
+ COMPARE_PREFIX(dci(0x6503ad68), "unallocated");
+ COMPARE_PREFIX(dci(0x65042a6c), "unallocated");
+ COMPARE_PREFIX(dci(0x6504882f), "unallocated");
+ COMPARE_PREFIX(dci(0x6504a770), "unallocated");
+ COMPARE_PREFIX(dci(0x65052474), "unallocated");
+ COMPARE_PREFIX(dci(0x65058237), "unallocated");
+ COMPARE_PREFIX(dci(0x65063dbd), "unallocated");
+ COMPARE_PREFIX(dci(0x65069b80), "unallocated");
+ COMPARE_PREFIX(dci(0x6506bac1), "unallocated");
+ COMPARE_PREFIX(dci(0x65071884), "unallocated");
+ COMPARE_PREFIX(dci(0x650737c5), "unallocated");
+ COMPARE_PREFIX(dci(0x65079588), "unallocated");
+ COMPARE_PREFIX(dci(0x6507b4c9), "unallocated");
+ COMPARE_PREFIX(dci(0x65088f90), "unallocated");
+ COMPARE_PREFIX(dci(0x65090c94), "unallocated");
+ COMPARE_PREFIX(dci(0x65098998), "unallocated");
+ COMPARE_PREFIX(dci(0x650a83a0), "unallocated");
+ COMPARE_PREFIX(dci(0x650c96f1), "unallocated");
+ COMPARE_PREFIX(dci(0x650d90f9), "unallocated");
+ COMPARE_PREFIX(dci(0x65113a97), "unallocated");
+ COMPARE_PREFIX(dci(0x65183010), "unallocated");
+ COMPARE_PREFIX(dci(0x65200050), "unallocated");
+ COMPARE_PREFIX(dci(0x65203ed2), "unallocated");
+ COMPARE_PREFIX(dci(0x65205e13), "unallocated");
+ COMPARE_PREFIX(dci(0x65207d54), "unallocated");
+ COMPARE_PREFIX(dci(0x65209c95), "unallocated");
+ COMPARE_PREFIX(dci(0x6520bbd6), "unallocated");
+ COMPARE_PREFIX(dci(0x6520db17), "unallocated");
+ COMPARE_PREFIX(dci(0x6520fa58), "unallocated");
+ COMPARE_PREFIX(dci(0x650f31e1), "unallocated");
+ COMPARE_PREFIX(dci(0x650e30f7), "unallocated");
+ COMPARE_PREFIX(dci(0x6511376e), "unallocated");
CLEANUP();
}
@@ -69,49 +126,47 @@ TEST(sve) {
TEST(sve_address_generation) {
SETUP();
-#if 0
- COMPARE_PREFIX(adr(z19.VnD(), z22.VnD(), z11.VnD()), "adr <Zd>.D, [<Zn>.D, <Zm>.D, SXTW{<amount>}]");
- COMPARE_PREFIX(adr(z30.VnD(), z14.VnD(), z24.VnD()), "adr <Zd>.D, [<Zn>.D, <Zm>.D, UXTW{<amount>}]");
- COMPARE_PREFIX(adr(z8.Vn?(), z16.Vn?(), z16.Vn?()), "adr <Zd>.<T>, [<Zn>.<T>, <Zm>.<T>{, <mod> <amount>}]");
-#endif
- COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)),
- "adr z19.d, [z22.d, z11.d, sxtw]");
- COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)),
- "adr z19.d, [z22.d, z11.d, sxtw #1]");
- COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)),
- "adr z19.d, [z22.d, z11.d, sxtw #2]");
- COMPARE_PREFIX(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)),
- "adr z19.d, [z22.d, z11.d, sxtw #3]");
- COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)),
- "adr z30.d, [z14.d, z16.d, uxtw]");
- COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)),
- "adr z30.d, [z14.d, z16.d, uxtw #1]");
- COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)),
- "adr z30.d, [z14.d, z16.d, uxtw #2]");
- COMPARE_PREFIX(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)),
- "adr z30.d, [z14.d, z16.d, uxtw #3]");
- COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())),
- "adr z8.s, [z16.s, z16.s]");
- COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)),
- "adr z8.s, [z16.s, z16.s, lsl #1]");
- COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)),
- "adr z8.s, [z16.s, z16.s, lsl #2]");
- COMPARE_PREFIX(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)),
- "adr z8.s, [z16.s, z16.s, lsl #3]");
- COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())),
- "adr z9.d, [z1.d, z16.d]");
- COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)),
- "adr z9.d, [z1.d, z16.d, lsl #1]");
- COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)),
- "adr z9.d, [z1.d, z16.d, lsl #2]");
- COMPARE_PREFIX(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)),
- "adr z9.d, [z1.d, z16.d, lsl #3]");
+ COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW)),
+ "adr z19.d, [z22.d, z11.d, sxtw]");
+ COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 1)),
+ "adr z19.d, [z22.d, z11.d, sxtw #1]");
+ COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 2)),
+ "adr z19.d, [z22.d, z11.d, sxtw #2]");
+ COMPARE(adr(z19.VnD(), SVEMemOperand(z22.VnD(), z11.VnD(), SXTW, 3)),
+ "adr z19.d, [z22.d, z11.d, sxtw #3]");
+ COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW)),
+ "adr z30.d, [z14.d, z16.d, uxtw]");
+ COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 1)),
+ "adr z30.d, [z14.d, z16.d, uxtw #1]");
+ COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 2)),
+ "adr z30.d, [z14.d, z16.d, uxtw #2]");
+ COMPARE(adr(z30.VnD(), SVEMemOperand(z14.VnD(), z16.VnD(), UXTW, 3)),
+ "adr z30.d, [z14.d, z16.d, uxtw #3]");
+ COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS())),
+ "adr z8.s, [z16.s, z16.s]");
+ COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 1)),
+ "adr z8.s, [z16.s, z16.s, lsl #1]");
+ COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 2)),
+ "adr z8.s, [z16.s, z16.s, lsl #2]");
+ COMPARE(adr(z8.VnS(), SVEMemOperand(z16.VnS(), z16.VnS(), LSL, 3)),
+ "adr z8.s, [z16.s, z16.s, lsl #3]");
+ COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD())),
+ "adr z9.d, [z1.d, z16.d]");
+ COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 1)),
+ "adr z9.d, [z1.d, z16.d, lsl #1]");
+ COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 2)),
+ "adr z9.d, [z1.d, z16.d, lsl #2]");
+ COMPARE(adr(z9.VnD(), SVEMemOperand(z1.VnD(), z16.VnD(), LSL, 3)),
+ "adr z9.d, [z1.d, z16.d, lsl #3]");
CLEANUP();
}
TEST(sve_calculate_sve_address) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef CalculateSVEAddressMacroAssembler MacroAssembler;
@@ -156,49 +211,47 @@ TEST(sve_calculate_sve_address) {
"add x22, sp, x3, lsl #2");
CLEANUP();
+
+#pragma GCC diagnostic pop
}
TEST(sve_bitwise_imm) {
SETUP();
// The assembler will necessarily encode an immediate in the simplest bitset.
- COMPARE_PREFIX(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff),
- "and z2.s, z2.s, #0xffff");
- COMPARE_PREFIX(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00");
- COMPARE_PREFIX(eor(z26.VnH(), z26.VnH(), 0x7ff8),
- "eor z26.h, z26.h, #0x7ff8");
- COMPARE_PREFIX(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78");
+ COMPARE(and_(z2.VnD(), z2.VnD(), 0x0000ffff0000ffff),
+ "and z2.s, z2.s, #0xffff");
+ COMPARE(dupm(z15.VnS(), 0x7f007f00), "dupm z15.h, #0x7f00");
+ COMPARE(eor(z26.VnH(), z26.VnH(), 0x7ff8), "eor z26.h, z26.h, #0x7ff8");
+ COMPARE(orr(z13.VnB(), z13.VnB(), 0x78), "orr z13.b, z13.b, #0x78");
// Logical aliases.
- COMPARE_PREFIX(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff),
- "and z21.d, z21.d, #0xffffffff0000");
- COMPARE_PREFIX(eon(z31.VnS(), z31.VnS(), 0x1ffe),
- "eor z31.s, z31.s, #0xffffe001");
- COMPARE_PREFIX(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd");
+ COMPARE(bic(z21.VnD(), z21.VnD(), 0xffff00000000ffff),
+ "and z21.d, z21.d, #0xffffffff0000");
+ COMPARE(eon(z31.VnS(), z31.VnS(), 0x1ffe), "eor z31.s, z31.s, #0xffffe001");
+ COMPARE(orn(z11.VnH(), z11.VnH(), 0x2), "orr z11.h, z11.h, #0xfffd");
// Mov alias for dupm.
- COMPARE_PREFIX(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f");
+ COMPARE(mov(z0.VnH(), 0xf00f), "mov z0.h, #0xf00f");
COMPARE_MACRO(Mov(z11.VnS(), 0xe0000003), "mov z11.s, #0xe0000003");
COMPARE_MACRO(Mov(z22.VnD(), 0x8000), "dupm z22.d, #0x8000");
// Test dupm versus mov disassembly.
- COMPARE_PREFIX(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe");
- COMPARE_PREFIX(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff");
- COMPARE_PREFIX(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe");
- COMPARE_PREFIX(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00");
- COMPARE_PREFIX(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01");
- COMPARE_PREFIX(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff");
- COMPARE_PREFIX(dupm(z0.VnD(), 0xffffffffffffff00),
- "dupm z0.d, #0xffffffffffffff00");
- COMPARE_PREFIX(dupm(z0.VnD(), 0x7fffffffffffff80),
- "mov z0.d, #0x7fffffffffffff80");
- COMPARE_PREFIX(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000");
- COMPARE_PREFIX(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000");
+ COMPARE(dupm(z0.VnH(), 0xfe), "dupm z0.h, #0xfe");
+ COMPARE(dupm(z0.VnH(), 0xff), "dupm z0.h, #0xff");
+ COMPARE(dupm(z0.VnH(), 0x1fe), "mov z0.h, #0x1fe");
+ COMPARE(dupm(z0.VnH(), 0xfe00), "dupm z0.h, #0xfe00");
+ COMPARE(dupm(z0.VnH(), 0xfe01), "mov z0.h, #0xfe01");
+ COMPARE(dupm(z0.VnS(), 0xfe00), "dupm z0.s, #0xfe00");
+ COMPARE(dupm(z0.VnS(), 0xfe000001), "mov z0.s, #0xfe000001");
+ COMPARE(dupm(z0.VnS(), 0xffffff00), "dupm z0.s, #0xffffff00");
+ COMPARE(dupm(z0.VnS(), 0xffffff01), "dupm z0.s, #0xffffff01");
+ COMPARE(dupm(z0.VnS(), 0xfffffe01), "mov z0.s, #0xfffffe01");
+ COMPARE(dupm(z0.VnS(), 0xfff), "mov z0.s, #0xfff");
+ COMPARE(dupm(z0.VnD(), 0xffffffffffffff00), "dupm z0.d, #0xffffffffffffff00");
+ COMPARE(dupm(z0.VnD(), 0x7fffffffffffff80), "mov z0.d, #0x7fffffffffffff80");
+ COMPARE(dupm(z0.VnD(), 0x8000), "dupm z0.d, #0x8000");
+ COMPARE(dupm(z0.VnD(), 0x10000), "mov z0.d, #0x10000");
CLEANUP();
}
@@ -206,15 +259,14 @@ TEST(sve_bitwise_imm) {
TEST(sve_bitwise_logical_unpredicated) {
SETUP();
- COMPARE_PREFIX(and_(z12.VnD(), z5.VnD(), z29.VnD()),
- "and z12.d, z5.d, z29.d");
- COMPARE_PREFIX(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d");
- COMPARE_PREFIX(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d");
- COMPARE_PREFIX(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d");
+ COMPARE(and_(z12.VnD(), z5.VnD(), z29.VnD()), "and z12.d, z5.d, z29.d");
+ COMPARE(bic(z11.VnD(), z15.VnD(), z9.VnD()), "bic z11.d, z15.d, z9.d");
+ COMPARE(eor(z9.VnD(), z31.VnD(), z29.VnD()), "eor z9.d, z31.d, z29.d");
+ COMPARE(orr(z17.VnD(), z8.VnD(), z19.VnD()), "orr z17.d, z8.d, z19.d");
// Check mov aliases.
- COMPARE_PREFIX(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d");
- COMPARE_PREFIX(mov(z18, z9), "mov z18.d, z9.d");
+ COMPARE(orr(z17.VnD(), z8.VnD(), z8.VnD()), "mov z17.d, z8.d");
+ COMPARE(mov(z18, z9), "mov z18.d, z9.d");
COMPARE_MACRO(Mov(z19, z10), "mov z19.d, z10.d");
CLEANUP();
@@ -223,73 +275,73 @@ TEST(sve_bitwise_logical_unpredicated) {
TEST(sve_bitwise_shift_predicated) {
SETUP();
- COMPARE_PREFIX(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()),
- "asrr z20.b, p3/m, z20.b, z11.b");
- COMPARE_PREFIX(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()),
- "asrr z20.h, p3/m, z20.h, z11.h");
- COMPARE_PREFIX(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()),
- "asrr z20.s, p3/m, z20.s, z11.s");
- COMPARE_PREFIX(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()),
- "asrr z20.d, p3/m, z20.d, z11.d");
- COMPARE_PREFIX(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()),
- "asr z26.b, p2/m, z26.b, z17.b");
- COMPARE_PREFIX(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()),
- "asr z26.h, p2/m, z26.h, z17.h");
- COMPARE_PREFIX(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()),
- "asr z26.s, p2/m, z26.s, z17.s");
- COMPARE_PREFIX(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()),
- "asr z26.d, p2/m, z26.d, z17.d");
- COMPARE_PREFIX(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()),
- "lslr z30.b, p1/m, z30.b, z26.b");
- COMPARE_PREFIX(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()),
- "lslr z30.h, p1/m, z30.h, z26.h");
- COMPARE_PREFIX(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()),
- "lslr z30.s, p1/m, z30.s, z26.s");
- COMPARE_PREFIX(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()),
- "lslr z30.d, p1/m, z30.d, z26.d");
- COMPARE_PREFIX(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()),
- "lsl z14.b, p6/m, z14.b, z25.b");
- COMPARE_PREFIX(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()),
- "lsl z14.h, p6/m, z14.h, z25.h");
- COMPARE_PREFIX(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()),
- "lsl z14.s, p6/m, z14.s, z25.s");
- COMPARE_PREFIX(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()),
- "lsl z14.d, p6/m, z14.d, z25.d");
- COMPARE_PREFIX(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()),
- "lsrr z3.b, p1/m, z3.b, z16.b");
- COMPARE_PREFIX(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()),
- "lsrr z3.h, p1/m, z3.h, z16.h");
- COMPARE_PREFIX(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()),
- "lsrr z3.s, p1/m, z3.s, z16.s");
- COMPARE_PREFIX(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()),
- "lsrr z3.d, p1/m, z3.d, z16.d");
- COMPARE_PREFIX(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()),
- "lsr z29.b, p7/m, z29.b, z13.b");
- COMPARE_PREFIX(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()),
- "lsr z29.h, p7/m, z29.h, z13.h");
- COMPARE_PREFIX(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()),
- "lsr z29.s, p7/m, z29.s, z13.s");
- COMPARE_PREFIX(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()),
- "lsr z29.d, p7/m, z29.d, z13.d");
-
- COMPARE_PREFIX(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()),
- "asr z4.b, p0/m, z4.b, z30.d");
- COMPARE_PREFIX(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()),
- "asr z4.h, p0/m, z4.h, z30.d");
- COMPARE_PREFIX(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()),
- "asr z4.s, p0/m, z4.s, z30.d");
- COMPARE_PREFIX(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()),
- "lsl z13.b, p7/m, z13.b, z18.d");
- COMPARE_PREFIX(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()),
- "lsl z13.h, p7/m, z13.h, z18.d");
- COMPARE_PREFIX(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()),
- "lsl z13.s, p7/m, z13.s, z18.d");
- COMPARE_PREFIX(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()),
- "lsr z1.b, p4/m, z1.b, z14.d");
- COMPARE_PREFIX(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()),
- "lsr z1.h, p4/m, z1.h, z14.d");
- COMPARE_PREFIX(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()),
- "lsr z1.s, p4/m, z1.s, z14.d");
+ COMPARE(asrr(z20.VnB(), p3.Merging(), z20.VnB(), z11.VnB()),
+ "asrr z20.b, p3/m, z20.b, z11.b");
+ COMPARE(asrr(z20.VnH(), p3.Merging(), z20.VnH(), z11.VnH()),
+ "asrr z20.h, p3/m, z20.h, z11.h");
+ COMPARE(asrr(z20.VnS(), p3.Merging(), z20.VnS(), z11.VnS()),
+ "asrr z20.s, p3/m, z20.s, z11.s");
+ COMPARE(asrr(z20.VnD(), p3.Merging(), z20.VnD(), z11.VnD()),
+ "asrr z20.d, p3/m, z20.d, z11.d");
+ COMPARE(asr(z26.VnB(), p2.Merging(), z26.VnB(), z17.VnB()),
+ "asr z26.b, p2/m, z26.b, z17.b");
+ COMPARE(asr(z26.VnH(), p2.Merging(), z26.VnH(), z17.VnH()),
+ "asr z26.h, p2/m, z26.h, z17.h");
+ COMPARE(asr(z26.VnS(), p2.Merging(), z26.VnS(), z17.VnS()),
+ "asr z26.s, p2/m, z26.s, z17.s");
+ COMPARE(asr(z26.VnD(), p2.Merging(), z26.VnD(), z17.VnD()),
+ "asr z26.d, p2/m, z26.d, z17.d");
+ COMPARE(lslr(z30.VnB(), p1.Merging(), z30.VnB(), z26.VnB()),
+ "lslr z30.b, p1/m, z30.b, z26.b");
+ COMPARE(lslr(z30.VnH(), p1.Merging(), z30.VnH(), z26.VnH()),
+ "lslr z30.h, p1/m, z30.h, z26.h");
+ COMPARE(lslr(z30.VnS(), p1.Merging(), z30.VnS(), z26.VnS()),
+ "lslr z30.s, p1/m, z30.s, z26.s");
+ COMPARE(lslr(z30.VnD(), p1.Merging(), z30.VnD(), z26.VnD()),
+ "lslr z30.d, p1/m, z30.d, z26.d");
+ COMPARE(lsl(z14.VnB(), p6.Merging(), z14.VnB(), z25.VnB()),
+ "lsl z14.b, p6/m, z14.b, z25.b");
+ COMPARE(lsl(z14.VnH(), p6.Merging(), z14.VnH(), z25.VnH()),
+ "lsl z14.h, p6/m, z14.h, z25.h");
+ COMPARE(lsl(z14.VnS(), p6.Merging(), z14.VnS(), z25.VnS()),
+ "lsl z14.s, p6/m, z14.s, z25.s");
+ COMPARE(lsl(z14.VnD(), p6.Merging(), z14.VnD(), z25.VnD()),
+ "lsl z14.d, p6/m, z14.d, z25.d");
+ COMPARE(lsrr(z3.VnB(), p1.Merging(), z3.VnB(), z16.VnB()),
+ "lsrr z3.b, p1/m, z3.b, z16.b");
+ COMPARE(lsrr(z3.VnH(), p1.Merging(), z3.VnH(), z16.VnH()),
+ "lsrr z3.h, p1/m, z3.h, z16.h");
+ COMPARE(lsrr(z3.VnS(), p1.Merging(), z3.VnS(), z16.VnS()),
+ "lsrr z3.s, p1/m, z3.s, z16.s");
+ COMPARE(lsrr(z3.VnD(), p1.Merging(), z3.VnD(), z16.VnD()),
+ "lsrr z3.d, p1/m, z3.d, z16.d");
+ COMPARE(lsr(z29.VnB(), p7.Merging(), z29.VnB(), z13.VnB()),
+ "lsr z29.b, p7/m, z29.b, z13.b");
+ COMPARE(lsr(z29.VnH(), p7.Merging(), z29.VnH(), z13.VnH()),
+ "lsr z29.h, p7/m, z29.h, z13.h");
+ COMPARE(lsr(z29.VnS(), p7.Merging(), z29.VnS(), z13.VnS()),
+ "lsr z29.s, p7/m, z29.s, z13.s");
+ COMPARE(lsr(z29.VnD(), p7.Merging(), z29.VnD(), z13.VnD()),
+ "lsr z29.d, p7/m, z29.d, z13.d");
+
+ COMPARE(asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnD()),
+ "asr z4.b, p0/m, z4.b, z30.d");
+ COMPARE(asr(z4.VnH(), p0.Merging(), z4.VnH(), z30.VnD()),
+ "asr z4.h, p0/m, z4.h, z30.d");
+ COMPARE(asr(z4.VnS(), p0.Merging(), z4.VnS(), z30.VnD()),
+ "asr z4.s, p0/m, z4.s, z30.d");
+ COMPARE(lsl(z13.VnB(), p7.Merging(), z13.VnB(), z18.VnD()),
+ "lsl z13.b, p7/m, z13.b, z18.d");
+ COMPARE(lsl(z13.VnH(), p7.Merging(), z13.VnH(), z18.VnD()),
+ "lsl z13.h, p7/m, z13.h, z18.d");
+ COMPARE(lsl(z13.VnS(), p7.Merging(), z13.VnS(), z18.VnD()),
+ "lsl z13.s, p7/m, z13.s, z18.d");
+ COMPARE(lsr(z1.VnB(), p4.Merging(), z1.VnB(), z14.VnD()),
+ "lsr z1.b, p4/m, z1.b, z14.d");
+ COMPARE(lsr(z1.VnH(), p4.Merging(), z1.VnH(), z14.VnD()),
+ "lsr z1.h, p4/m, z1.h, z14.d");
+ COMPARE(lsr(z1.VnS(), p4.Merging(), z1.VnS(), z14.VnD()),
+ "lsr z1.s, p4/m, z1.s, z14.d");
COMPARE_MACRO(Asr(z4.VnB(), p0.Merging(), z4.VnB(), z30.VnB()),
"asr z4.b, p0/m, z4.b, z30.b");
@@ -313,38 +365,34 @@ TEST(sve_bitwise_shift_predicated) {
"movprfx z4.b, p0/m, z10.b\n"
"lsr z4.b, p0/m, z4.b, z14.b");
- COMPARE_PREFIX(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1),
- "asrd z0.b, p4/m, z0.b, #1");
- COMPARE_PREFIX(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1),
- "asrd z0.h, p4/m, z0.h, #1");
- COMPARE_PREFIX(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1),
- "asrd z0.s, p4/m, z0.s, #1");
- COMPARE_PREFIX(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1),
- "asrd z0.d, p4/m, z0.d, #1");
- COMPARE_PREFIX(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3),
- "asr z8.b, p7/m, z8.b, #3");
- COMPARE_PREFIX(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3),
- "asr z8.h, p7/m, z8.h, #3");
- COMPARE_PREFIX(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3),
- "asr z8.s, p7/m, z8.s, #3");
- COMPARE_PREFIX(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3),
- "asr z8.d, p7/m, z8.d, #3");
- COMPARE_PREFIX(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0),
- "lsl z29.b, p6/m, z29.b, #0");
- COMPARE_PREFIX(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5),
- "lsl z29.h, p6/m, z29.h, #5");
- COMPARE_PREFIX(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0),
- "lsl z29.s, p6/m, z29.s, #0");
- COMPARE_PREFIX(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63),
- "lsl z29.d, p6/m, z29.d, #63");
- COMPARE_PREFIX(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8),
- "lsr z24.b, p2/m, z24.b, #8");
- COMPARE_PREFIX(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16),
- "lsr z24.h, p2/m, z24.h, #16");
- COMPARE_PREFIX(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32),
- "lsr z24.s, p2/m, z24.s, #32");
- COMPARE_PREFIX(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64),
- "lsr z24.d, p2/m, z24.d, #64");
+ COMPARE(asrd(z0.VnB(), p4.Merging(), z0.VnB(), 1),
+ "asrd z0.b, p4/m, z0.b, #1");
+ COMPARE(asrd(z0.VnH(), p4.Merging(), z0.VnH(), 1),
+ "asrd z0.h, p4/m, z0.h, #1");
+ COMPARE(asrd(z0.VnS(), p4.Merging(), z0.VnS(), 1),
+ "asrd z0.s, p4/m, z0.s, #1");
+ COMPARE(asrd(z0.VnD(), p4.Merging(), z0.VnD(), 1),
+ "asrd z0.d, p4/m, z0.d, #1");
+ COMPARE(asr(z8.VnB(), p7.Merging(), z8.VnB(), 3), "asr z8.b, p7/m, z8.b, #3");
+ COMPARE(asr(z8.VnH(), p7.Merging(), z8.VnH(), 3), "asr z8.h, p7/m, z8.h, #3");
+ COMPARE(asr(z8.VnS(), p7.Merging(), z8.VnS(), 3), "asr z8.s, p7/m, z8.s, #3");
+ COMPARE(asr(z8.VnD(), p7.Merging(), z8.VnD(), 3), "asr z8.d, p7/m, z8.d, #3");
+ COMPARE(lsl(z29.VnB(), p6.Merging(), z29.VnB(), 0),
+ "lsl z29.b, p6/m, z29.b, #0");
+ COMPARE(lsl(z29.VnH(), p6.Merging(), z29.VnH(), 5),
+ "lsl z29.h, p6/m, z29.h, #5");
+ COMPARE(lsl(z29.VnS(), p6.Merging(), z29.VnS(), 0),
+ "lsl z29.s, p6/m, z29.s, #0");
+ COMPARE(lsl(z29.VnD(), p6.Merging(), z29.VnD(), 63),
+ "lsl z29.d, p6/m, z29.d, #63");
+ COMPARE(lsr(z24.VnB(), p2.Merging(), z24.VnB(), 8),
+ "lsr z24.b, p2/m, z24.b, #8");
+ COMPARE(lsr(z24.VnH(), p2.Merging(), z24.VnH(), 16),
+ "lsr z24.h, p2/m, z24.h, #16");
+ COMPARE(lsr(z24.VnS(), p2.Merging(), z24.VnS(), 32),
+ "lsr z24.s, p2/m, z24.s, #32");
+ COMPARE(lsr(z24.VnD(), p2.Merging(), z24.VnD(), 64),
+ "lsr z24.d, p2/m, z24.d, #64");
COMPARE_MACRO(Asrd(z0.VnB(), p4.Merging(), z8.VnB(), 1),
"movprfx z0.b, p4/m, z8.b\n"
@@ -370,42 +418,39 @@ TEST(sve_bitwise_shift_unpredicated) {
// Test asr with reserved tsz field.
COMPARE_PREFIX(dci(0x04209345), "unimplemented");
- COMPARE_PREFIX(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1");
- COMPARE_PREFIX(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8");
- COMPARE_PREFIX(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1");
- COMPARE_PREFIX(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16");
- COMPARE_PREFIX(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1");
- COMPARE_PREFIX(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32");
- COMPARE_PREFIX(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1");
- COMPARE_PREFIX(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64");
- COMPARE_PREFIX(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3");
- COMPARE_PREFIX(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7");
- COMPARE_PREFIX(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8");
- COMPARE_PREFIX(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15");
- COMPARE_PREFIX(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #1");
- COMPARE_PREFIX(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31");
- COMPARE_PREFIX(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30");
- COMPARE_PREFIX(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63");
- COMPARE_PREFIX(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4");
- COMPARE_PREFIX(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6");
- COMPARE_PREFIX(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10");
- COMPARE_PREFIX(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14");
- COMPARE_PREFIX(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21");
- COMPARE_PREFIX(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30");
- COMPARE_PREFIX(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44");
- COMPARE_PREFIX(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62");
- COMPARE_PREFIX(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d");
- COMPARE_PREFIX(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d");
- COMPARE_PREFIX(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d");
- COMPARE_PREFIX(lsl(z21.VnB(), z16.VnB(), z15.VnD()),
- "lsl z21.b, z16.b, z15.d");
- COMPARE_PREFIX(lsl(z23.VnH(), z16.VnH(), z13.VnD()),
- "lsl z23.h, z16.h, z13.d");
- COMPARE_PREFIX(lsl(z25.VnS(), z16.VnS(), z11.VnD()),
- "lsl z25.s, z16.s, z11.d");
- COMPARE_PREFIX(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d");
- COMPARE_PREFIX(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d");
- COMPARE_PREFIX(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d");
+ COMPARE(asr(z4.VnB(), z27.VnB(), 1), "asr z4.b, z27.b, #1");
+ COMPARE(asr(z5.VnB(), z26.VnB(), 8), "asr z5.b, z26.b, #8");
+ COMPARE(asr(z6.VnH(), z25.VnH(), 1), "asr z6.h, z25.h, #1");
+ COMPARE(asr(z7.VnH(), z24.VnH(), 16), "asr z7.h, z24.h, #16");
+ COMPARE(asr(z8.VnS(), z23.VnS(), 1), "asr z8.s, z23.s, #1");
+ COMPARE(asr(z9.VnS(), z22.VnS(), 32), "asr z9.s, z22.s, #32");
+ COMPARE(asr(z10.VnD(), z21.VnD(), 1), "asr z10.d, z21.d, #1");
+ COMPARE(asr(z11.VnD(), z20.VnD(), 64), "asr z11.d, z20.d, #64");
+ COMPARE(lsr(z4.VnB(), z27.VnB(), 3), "lsr z4.b, z27.b, #3");
+ COMPARE(lsr(z5.VnB(), z26.VnB(), 7), "lsr z5.b, z26.b, #7");
+ COMPARE(lsr(z6.VnH(), z25.VnH(), 8), "lsr z6.h, z25.h, #8");
+ COMPARE(lsr(z7.VnH(), z24.VnH(), 15), "lsr z7.h, z24.h, #15");
+ COMPARE(lsr(z8.VnS(), z23.VnS(), 14), "lsr z8.s, z23.s, #14");
+ COMPARE(lsr(z9.VnS(), z22.VnS(), 31), "lsr z9.s, z22.s, #31");
+ COMPARE(lsr(z10.VnD(), z21.VnD(), 30), "lsr z10.d, z21.d, #30");
+ COMPARE(lsr(z11.VnD(), z20.VnD(), 63), "lsr z11.d, z20.d, #63");
+ COMPARE(lsl(z4.VnB(), z27.VnB(), 4), "lsl z4.b, z27.b, #4");
+ COMPARE(lsl(z5.VnB(), z26.VnB(), 6), "lsl z5.b, z26.b, #6");
+ COMPARE(lsl(z6.VnH(), z25.VnH(), 10), "lsl z6.h, z25.h, #10");
+ COMPARE(lsl(z7.VnH(), z24.VnH(), 14), "lsl z7.h, z24.h, #14");
+ COMPARE(lsl(z8.VnS(), z23.VnS(), 21), "lsl z8.s, z23.s, #21");
+ COMPARE(lsl(z9.VnS(), z22.VnS(), 30), "lsl z9.s, z22.s, #30");
+ COMPARE(lsl(z10.VnD(), z21.VnD(), 44), "lsl z10.d, z21.d, #44");
+ COMPARE(lsl(z11.VnD(), z20.VnD(), 62), "lsl z11.d, z20.d, #62");
+ COMPARE(asr(z12.VnB(), z8.VnB(), z14.VnD()), "asr z12.b, z8.b, z14.d");
+ COMPARE(asr(z14.VnH(), z8.VnH(), z12.VnD()), "asr z14.h, z8.h, z12.d");
+ COMPARE(asr(z16.VnS(), z8.VnS(), z10.VnD()), "asr z16.s, z8.s, z10.d");
+ COMPARE(lsl(z21.VnB(), z16.VnB(), z15.VnD()), "lsl z21.b, z16.b, z15.d");
+ COMPARE(lsl(z23.VnH(), z16.VnH(), z13.VnD()), "lsl z23.h, z16.h, z13.d");
+ COMPARE(lsl(z25.VnS(), z16.VnS(), z11.VnD()), "lsl z25.s, z16.s, z11.d");
+ COMPARE(lsr(z16.VnB(), z19.VnB(), z2.VnD()), "lsr z16.b, z19.b, z2.d");
+ COMPARE(lsr(z18.VnH(), z19.VnH(), z4.VnD()), "lsr z18.h, z19.h, z4.d");
+ COMPARE(lsr(z20.VnS(), z19.VnS(), z6.VnD()), "lsr z20.s, z19.s, z6.d");
CLEANUP();
}
@@ -825,9 +870,9 @@ TEST(sve_vector_inc_element_count) {
TEST(sve_fp_accumulating_reduction) {
SETUP();
- COMPARE_PREFIX(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h");
- COMPARE_PREFIX(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s");
- COMPARE_PREFIX(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d");
+ COMPARE(fadda(h10, p2, h10, z0.VnH()), "fadda h10, p2, h10, z0.h");
+ COMPARE(fadda(s10, p2, s10, z0.VnS()), "fadda s10, p2, s10, z0.s");
+ COMPARE(fadda(d10, p2, d10, z0.VnD()), "fadda d10, p2, d10, z0.d");
CLEANUP();
}
@@ -835,49 +880,49 @@ TEST(sve_fp_accumulating_reduction) {
TEST(sve_fp_arithmetic_predicated) {
SETUP();
- COMPARE_PREFIX(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()),
- "fdiv z9.h, p4/m, z9.h, z4.h");
- COMPARE_PREFIX(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()),
- "fdiv z19.s, p5/m, z19.s, z14.s");
- COMPARE_PREFIX(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()),
- "fdiv z29.d, p6/m, z29.d, z24.d");
- COMPARE_PREFIX(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()),
- "fdivr z21.h, p3/m, z21.h, z11.h");
- COMPARE_PREFIX(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()),
- "fdivr z23.s, p5/m, z23.s, z15.s");
- COMPARE_PREFIX(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()),
- "fdivr z25.d, p7/m, z25.d, z19.d");
- COMPARE_PREFIX(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()),
- "fmax z4.h, p1/m, z4.h, z29.h");
- COMPARE_PREFIX(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()),
- "fmax z14.s, p3/m, z14.s, z29.s");
- COMPARE_PREFIX(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()),
- "fmax z24.d, p5/m, z24.d, z29.d");
- COMPARE_PREFIX(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()),
- "fmin z1.h, p2/m, z1.h, z30.h");
- COMPARE_PREFIX(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()),
- "fmin z11.s, p4/m, z11.s, z30.s");
- COMPARE_PREFIX(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()),
- "fmin z21.d, p6/m, z21.d, z30.d");
-
- COMPARE_PREFIX(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0),
- "ftmad z21.h, z21.h, z22.h, #0");
- COMPARE_PREFIX(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2),
- "ftmad z21.h, z21.h, z22.h, #2");
- COMPARE_PREFIX(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7),
- "ftmad z2.h, z2.h, z21.h, #7");
- COMPARE_PREFIX(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0),
- "ftmad z21.s, z21.s, z22.s, #0");
- COMPARE_PREFIX(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2),
- "ftmad z21.s, z21.s, z22.s, #2");
- COMPARE_PREFIX(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7),
- "ftmad z2.s, z2.s, z21.s, #7");
- COMPARE_PREFIX(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0),
- "ftmad z21.d, z21.d, z22.d, #0");
- COMPARE_PREFIX(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2),
- "ftmad z21.d, z21.d, z22.d, #2");
- COMPARE_PREFIX(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7),
- "ftmad z2.d, z2.d, z21.d, #7");
+ COMPARE(fdiv(z9.VnH(), p4.Merging(), z9.VnH(), z4.VnH()),
+ "fdiv z9.h, p4/m, z9.h, z4.h");
+ COMPARE(fdiv(z19.VnS(), p5.Merging(), z19.VnS(), z14.VnS()),
+ "fdiv z19.s, p5/m, z19.s, z14.s");
+ COMPARE(fdiv(z29.VnD(), p6.Merging(), z29.VnD(), z24.VnD()),
+ "fdiv z29.d, p6/m, z29.d, z24.d");
+ COMPARE(fdivr(z21.VnH(), p3.Merging(), z21.VnH(), z11.VnH()),
+ "fdivr z21.h, p3/m, z21.h, z11.h");
+ COMPARE(fdivr(z23.VnS(), p5.Merging(), z23.VnS(), z15.VnS()),
+ "fdivr z23.s, p5/m, z23.s, z15.s");
+ COMPARE(fdivr(z25.VnD(), p7.Merging(), z25.VnD(), z19.VnD()),
+ "fdivr z25.d, p7/m, z25.d, z19.d");
+ COMPARE(fmax(z4.VnH(), p1.Merging(), z4.VnH(), z29.VnH()),
+ "fmax z4.h, p1/m, z4.h, z29.h");
+ COMPARE(fmax(z14.VnS(), p3.Merging(), z14.VnS(), z29.VnS()),
+ "fmax z14.s, p3/m, z14.s, z29.s");
+ COMPARE(fmax(z24.VnD(), p5.Merging(), z24.VnD(), z29.VnD()),
+ "fmax z24.d, p5/m, z24.d, z29.d");
+ COMPARE(fmin(z1.VnH(), p2.Merging(), z1.VnH(), z30.VnH()),
+ "fmin z1.h, p2/m, z1.h, z30.h");
+ COMPARE(fmin(z11.VnS(), p4.Merging(), z11.VnS(), z30.VnS()),
+ "fmin z11.s, p4/m, z11.s, z30.s");
+ COMPARE(fmin(z21.VnD(), p6.Merging(), z21.VnD(), z30.VnD()),
+ "fmin z21.d, p6/m, z21.d, z30.d");
+
+ COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 0),
+ "ftmad z21.h, z21.h, z22.h, #0");
+ COMPARE(ftmad(z21.VnH(), z21.VnH(), z22.VnH(), 2),
+ "ftmad z21.h, z21.h, z22.h, #2");
+ COMPARE(ftmad(z2.VnH(), z2.VnH(), z21.VnH(), 7),
+ "ftmad z2.h, z2.h, z21.h, #7");
+ COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 0),
+ "ftmad z21.s, z21.s, z22.s, #0");
+ COMPARE(ftmad(z21.VnS(), z21.VnS(), z22.VnS(), 2),
+ "ftmad z21.s, z21.s, z22.s, #2");
+ COMPARE(ftmad(z2.VnS(), z2.VnS(), z21.VnS(), 7),
+ "ftmad z2.s, z2.s, z21.s, #7");
+ COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 0),
+ "ftmad z21.d, z21.d, z22.d, #0");
+ COMPARE(ftmad(z21.VnD(), z21.VnD(), z22.VnD(), 2),
+ "ftmad z21.d, z21.d, z22.d, #2");
+ COMPARE(ftmad(z2.VnD(), z2.VnD(), z21.VnD(), 7),
+ "ftmad z2.d, z2.d, z21.d, #7");
COMPARE_MACRO(Ftmad(z3.VnH(), z2.VnH(), z1.VnH(), 1),
"movprfx z3, z2\n"
@@ -887,60 +932,60 @@ TEST(sve_fp_arithmetic_predicated) {
"movprfx z6, z4\n"
"ftmad z6.s, z6.s, z31.s, #1");
- COMPARE_PREFIX(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()),
- "fabd z31.h, p7/m, z31.h, z17.h");
- COMPARE_PREFIX(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()),
- "fabd z31.s, p7/m, z31.s, z17.s");
- COMPARE_PREFIX(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()),
- "fabd z31.d, p7/m, z31.d, z17.d");
- COMPARE_PREFIX(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()),
- "fadd z24.h, p2/m, z24.h, z15.h");
- COMPARE_PREFIX(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()),
- "fadd z24.s, p2/m, z24.s, z15.s");
- COMPARE_PREFIX(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()),
- "fadd z24.d, p2/m, z24.d, z15.d");
- COMPARE_PREFIX(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()),
- "fmaxnm z15.h, p4/m, z15.h, z3.h");
- COMPARE_PREFIX(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()),
- "fmaxnm z15.s, p4/m, z15.s, z3.s");
- COMPARE_PREFIX(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()),
- "fmaxnm z15.d, p4/m, z15.d, z3.d");
- COMPARE_PREFIX(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()),
- "fminnm z19.h, p2/m, z19.h, z29.h");
- COMPARE_PREFIX(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()),
- "fminnm z19.s, p2/m, z19.s, z29.s");
- COMPARE_PREFIX(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()),
- "fminnm z19.d, p2/m, z19.d, z29.d");
- COMPARE_PREFIX(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()),
- "fmulx z30.h, p6/m, z30.h, z20.h");
- COMPARE_PREFIX(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()),
- "fmulx z30.s, p6/m, z30.s, z20.s");
- COMPARE_PREFIX(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()),
- "fmulx z30.d, p6/m, z30.d, z20.d");
- COMPARE_PREFIX(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()),
- "fmul z26.h, p2/m, z26.h, z6.h");
- COMPARE_PREFIX(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()),
- "fmul z26.s, p2/m, z26.s, z6.s");
- COMPARE_PREFIX(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()),
- "fmul z26.d, p2/m, z26.d, z6.d");
- COMPARE_PREFIX(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()),
- "fscale z8.h, p3/m, z8.h, z6.h");
- COMPARE_PREFIX(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()),
- "fscale z8.s, p3/m, z8.s, z6.s");
- COMPARE_PREFIX(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()),
- "fscale z8.d, p3/m, z8.d, z6.d");
- COMPARE_PREFIX(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()),
- "fsubr z16.h, p5/m, z16.h, z15.h");
- COMPARE_PREFIX(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()),
- "fsubr z16.s, p5/m, z16.s, z15.s");
- COMPARE_PREFIX(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()),
- "fsubr z16.d, p5/m, z16.d, z15.d");
- COMPARE_PREFIX(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()),
- "fsub z16.h, p5/m, z16.h, z26.h");
- COMPARE_PREFIX(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()),
- "fsub z16.s, p5/m, z16.s, z26.s");
- COMPARE_PREFIX(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()),
- "fsub z16.d, p5/m, z16.d, z26.d");
+ COMPARE(fabd(z31.VnH(), p7.Merging(), z31.VnH(), z17.VnH()),
+ "fabd z31.h, p7/m, z31.h, z17.h");
+ COMPARE(fabd(z31.VnS(), p7.Merging(), z31.VnS(), z17.VnS()),
+ "fabd z31.s, p7/m, z31.s, z17.s");
+ COMPARE(fabd(z31.VnD(), p7.Merging(), z31.VnD(), z17.VnD()),
+ "fabd z31.d, p7/m, z31.d, z17.d");
+ COMPARE(fadd(z24.VnH(), p2.Merging(), z24.VnH(), z15.VnH()),
+ "fadd z24.h, p2/m, z24.h, z15.h");
+ COMPARE(fadd(z24.VnS(), p2.Merging(), z24.VnS(), z15.VnS()),
+ "fadd z24.s, p2/m, z24.s, z15.s");
+ COMPARE(fadd(z24.VnD(), p2.Merging(), z24.VnD(), z15.VnD()),
+ "fadd z24.d, p2/m, z24.d, z15.d");
+ COMPARE(fmaxnm(z15.VnH(), p4.Merging(), z15.VnH(), z3.VnH()),
+ "fmaxnm z15.h, p4/m, z15.h, z3.h");
+ COMPARE(fmaxnm(z15.VnS(), p4.Merging(), z15.VnS(), z3.VnS()),
+ "fmaxnm z15.s, p4/m, z15.s, z3.s");
+ COMPARE(fmaxnm(z15.VnD(), p4.Merging(), z15.VnD(), z3.VnD()),
+ "fmaxnm z15.d, p4/m, z15.d, z3.d");
+ COMPARE(fminnm(z19.VnH(), p2.Merging(), z19.VnH(), z29.VnH()),
+ "fminnm z19.h, p2/m, z19.h, z29.h");
+ COMPARE(fminnm(z19.VnS(), p2.Merging(), z19.VnS(), z29.VnS()),
+ "fminnm z19.s, p2/m, z19.s, z29.s");
+ COMPARE(fminnm(z19.VnD(), p2.Merging(), z19.VnD(), z29.VnD()),
+ "fminnm z19.d, p2/m, z19.d, z29.d");
+ COMPARE(fmulx(z30.VnH(), p6.Merging(), z30.VnH(), z20.VnH()),
+ "fmulx z30.h, p6/m, z30.h, z20.h");
+ COMPARE(fmulx(z30.VnS(), p6.Merging(), z30.VnS(), z20.VnS()),
+ "fmulx z30.s, p6/m, z30.s, z20.s");
+ COMPARE(fmulx(z30.VnD(), p6.Merging(), z30.VnD(), z20.VnD()),
+ "fmulx z30.d, p6/m, z30.d, z20.d");
+ COMPARE(fmul(z26.VnH(), p2.Merging(), z26.VnH(), z6.VnH()),
+ "fmul z26.h, p2/m, z26.h, z6.h");
+ COMPARE(fmul(z26.VnS(), p2.Merging(), z26.VnS(), z6.VnS()),
+ "fmul z26.s, p2/m, z26.s, z6.s");
+ COMPARE(fmul(z26.VnD(), p2.Merging(), z26.VnD(), z6.VnD()),
+ "fmul z26.d, p2/m, z26.d, z6.d");
+ COMPARE(fscale(z8.VnH(), p3.Merging(), z8.VnH(), z6.VnH()),
+ "fscale z8.h, p3/m, z8.h, z6.h");
+ COMPARE(fscale(z8.VnS(), p3.Merging(), z8.VnS(), z6.VnS()),
+ "fscale z8.s, p3/m, z8.s, z6.s");
+ COMPARE(fscale(z8.VnD(), p3.Merging(), z8.VnD(), z6.VnD()),
+ "fscale z8.d, p3/m, z8.d, z6.d");
+ COMPARE(fsubr(z16.VnH(), p5.Merging(), z16.VnH(), z15.VnH()),
+ "fsubr z16.h, p5/m, z16.h, z15.h");
+ COMPARE(fsubr(z16.VnS(), p5.Merging(), z16.VnS(), z15.VnS()),
+ "fsubr z16.s, p5/m, z16.s, z15.s");
+ COMPARE(fsubr(z16.VnD(), p5.Merging(), z16.VnD(), z15.VnD()),
+ "fsubr z16.d, p5/m, z16.d, z15.d");
+ COMPARE(fsub(z16.VnH(), p5.Merging(), z16.VnH(), z26.VnH()),
+ "fsub z16.h, p5/m, z16.h, z26.h");
+ COMPARE(fsub(z16.VnS(), p5.Merging(), z16.VnS(), z26.VnS()),
+ "fsub z16.s, p5/m, z16.s, z26.s");
+ COMPARE(fsub(z16.VnD(), p5.Merging(), z16.VnD(), z26.VnD()),
+ "fsub z16.d, p5/m, z16.d, z26.d");
COMPARE_MACRO(Fsub(z0.VnH(), p0.Merging(), z1.VnH(), z0.VnH()),
"fsubr z0.h, p0/m, z0.h, z1.h");
@@ -1005,54 +1050,54 @@ TEST(sve_fp_arithmetic_predicated) {
"movprfx z7.s, p7/m, z8.s\n"
"fscale z7.s, p7/m, z7.s, z31.s");
- COMPARE_PREFIX(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5),
- "fadd z18.h, p0/m, z18.h, #0.5");
- COMPARE_PREFIX(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0),
- "fadd z18.s, p0/m, z18.s, #1.0");
- COMPARE_PREFIX(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0),
- "fadd z18.d, p0/m, z18.d, #1.0");
- COMPARE_PREFIX(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0),
- "fmaxnm z6.h, p1/m, z6.h, #0.0");
- COMPARE_PREFIX(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0),
- "fmaxnm z6.s, p1/m, z6.s, #1.0");
- COMPARE_PREFIX(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0),
- "fmaxnm z6.d, p1/m, z6.d, #1.0");
- COMPARE_PREFIX(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0),
- "fmax z8.h, p6/m, z8.h, #0.0");
- COMPARE_PREFIX(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0),
- "fmax z8.s, p6/m, z8.s, #0.0");
- COMPARE_PREFIX(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0),
- "fmax z8.d, p6/m, z8.d, #1.0");
- COMPARE_PREFIX(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0),
- "fminnm z26.h, p0/m, z26.h, #1.0");
- COMPARE_PREFIX(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0),
- "fminnm z26.s, p0/m, z26.s, #0.0");
- COMPARE_PREFIX(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0),
- "fminnm z26.d, p0/m, z26.d, #1.0");
- COMPARE_PREFIX(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0),
- "fmin z22.h, p0/m, z22.h, #1.0");
- COMPARE_PREFIX(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0),
- "fmin z22.s, p0/m, z22.s, #1.0");
- COMPARE_PREFIX(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0),
- "fmin z22.d, p0/m, z22.d, #0.0");
- COMPARE_PREFIX(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5),
- "fmul z21.h, p3/m, z21.h, #0.5");
- COMPARE_PREFIX(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0),
- "fmul z21.s, p3/m, z21.s, #2.0");
- COMPARE_PREFIX(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0),
- "fmul z21.d, p3/m, z21.d, #2.0");
- COMPARE_PREFIX(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0),
- "fsubr z21.h, p3/m, z21.h, #1.0");
- COMPARE_PREFIX(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5),
- "fsubr z21.s, p3/m, z21.s, #0.5");
- COMPARE_PREFIX(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0),
- "fsubr z21.d, p3/m, z21.d, #1.0");
- COMPARE_PREFIX(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5),
- "fsub z26.h, p4/m, z26.h, #0.5");
- COMPARE_PREFIX(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0),
- "fsub z26.s, p4/m, z26.s, #1.0");
- COMPARE_PREFIX(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5),
- "fsub z26.d, p4/m, z26.d, #0.5");
+ COMPARE(fadd(z18.VnH(), p0.Merging(), z18.VnH(), 0.5),
+ "fadd z18.h, p0/m, z18.h, #0.5");
+ COMPARE(fadd(z18.VnS(), p0.Merging(), z18.VnS(), 1.0),
+ "fadd z18.s, p0/m, z18.s, #1.0");
+ COMPARE(fadd(z18.VnD(), p0.Merging(), z18.VnD(), 1.0),
+ "fadd z18.d, p0/m, z18.d, #1.0");
+ COMPARE(fmaxnm(z6.VnH(), p1.Merging(), z6.VnH(), 0.0),
+ "fmaxnm z6.h, p1/m, z6.h, #0.0");
+ COMPARE(fmaxnm(z6.VnS(), p1.Merging(), z6.VnS(), 1.0),
+ "fmaxnm z6.s, p1/m, z6.s, #1.0");
+ COMPARE(fmaxnm(z6.VnD(), p1.Merging(), z6.VnD(), 1.0),
+ "fmaxnm z6.d, p1/m, z6.d, #1.0");
+ COMPARE(fmax(z8.VnH(), p6.Merging(), z8.VnH(), 0.0),
+ "fmax z8.h, p6/m, z8.h, #0.0");
+ COMPARE(fmax(z8.VnS(), p6.Merging(), z8.VnS(), 0.0),
+ "fmax z8.s, p6/m, z8.s, #0.0");
+ COMPARE(fmax(z8.VnD(), p6.Merging(), z8.VnD(), 1.0),
+ "fmax z8.d, p6/m, z8.d, #1.0");
+ COMPARE(fminnm(z26.VnH(), p0.Merging(), z26.VnH(), 1.0),
+ "fminnm z26.h, p0/m, z26.h, #1.0");
+ COMPARE(fminnm(z26.VnS(), p0.Merging(), z26.VnS(), 0.0),
+ "fminnm z26.s, p0/m, z26.s, #0.0");
+ COMPARE(fminnm(z26.VnD(), p0.Merging(), z26.VnD(), 1.0),
+ "fminnm z26.d, p0/m, z26.d, #1.0");
+ COMPARE(fmin(z22.VnH(), p0.Merging(), z22.VnH(), 1.0),
+ "fmin z22.h, p0/m, z22.h, #1.0");
+ COMPARE(fmin(z22.VnS(), p0.Merging(), z22.VnS(), 1.0),
+ "fmin z22.s, p0/m, z22.s, #1.0");
+ COMPARE(fmin(z22.VnD(), p0.Merging(), z22.VnD(), 0.0),
+ "fmin z22.d, p0/m, z22.d, #0.0");
+ COMPARE(fmul(z21.VnH(), p3.Merging(), z21.VnH(), 0.5),
+ "fmul z21.h, p3/m, z21.h, #0.5");
+ COMPARE(fmul(z21.VnS(), p3.Merging(), z21.VnS(), 2.0),
+ "fmul z21.s, p3/m, z21.s, #2.0");
+ COMPARE(fmul(z21.VnD(), p3.Merging(), z21.VnD(), 2.0),
+ "fmul z21.d, p3/m, z21.d, #2.0");
+ COMPARE(fsubr(z21.VnH(), p3.Merging(), z21.VnH(), 1.0),
+ "fsubr z21.h, p3/m, z21.h, #1.0");
+ COMPARE(fsubr(z21.VnS(), p3.Merging(), z21.VnS(), 0.5),
+ "fsubr z21.s, p3/m, z21.s, #0.5");
+ COMPARE(fsubr(z21.VnD(), p3.Merging(), z21.VnD(), 1.0),
+ "fsubr z21.d, p3/m, z21.d, #1.0");
+ COMPARE(fsub(z26.VnH(), p4.Merging(), z26.VnH(), 0.5),
+ "fsub z26.h, p4/m, z26.h, #0.5");
+ COMPARE(fsub(z26.VnS(), p4.Merging(), z26.VnS(), 1.0),
+ "fsub z26.s, p4/m, z26.s, #1.0");
+ COMPARE(fsub(z26.VnD(), p4.Merging(), z26.VnD(), 0.5),
+ "fsub z26.d, p4/m, z26.d, #0.5");
COMPARE_MACRO(Fadd(z18.VnH(), p0.Merging(), z8.VnH(), 1.0),
"movprfx z18.h, p0/m, z8.h\n"
@@ -1082,6 +1127,9 @@ TEST(sve_fp_arithmetic_predicated) {
}
TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef FastNaNPropagationMacroAssembler MacroAssembler;
@@ -1131,9 +1179,14 @@ TEST(sve_fp_arithmetic_predicated_macro_fast_nan_propagation) {
"fmin z15.d, p6/m, z15.d, z8.d");
CLEANUP();
+
+#pragma GCC diagnostic pop
}
TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef StrictNaNPropagationMacroAssembler MacroAssembler;
@@ -1187,47 +1240,37 @@ TEST(sve_fp_arithmetic_predicated_macro_strict_nan_propagation) {
"fmin z15.d, p6/m, z15.d, z8.d");
CLEANUP();
+
+#pragma GCC diagnostic pop
}
TEST(sve_fp_arithmetic_unpredicated) {
SETUP();
- COMPARE_PREFIX(fadd(z5.VnH(), z24.VnH(), z11.VnH()),
- "fadd z5.h, z24.h, z11.h");
- COMPARE_PREFIX(fadd(z15.VnS(), z14.VnS(), z12.VnS()),
- "fadd z15.s, z14.s, z12.s");
- COMPARE_PREFIX(fadd(z25.VnD(), z4.VnD(), z13.VnD()),
- "fadd z25.d, z4.d, z13.d");
- COMPARE_PREFIX(fmul(z9.VnH(), z24.VnH(), z10.VnH()),
- "fmul z9.h, z24.h, z10.h");
- COMPARE_PREFIX(fmul(z19.VnS(), z14.VnS(), z0.VnS()),
- "fmul z19.s, z14.s, z0.s");
- COMPARE_PREFIX(fmul(z29.VnD(), z4.VnD(), z20.VnD()),
- "fmul z29.d, z4.d, z20.d");
- COMPARE_PREFIX(fsub(z4.VnH(), z14.VnH(), z29.VnH()),
- "fsub z4.h, z14.h, z29.h");
- COMPARE_PREFIX(fsub(z14.VnS(), z24.VnS(), z9.VnS()),
- "fsub z14.s, z24.s, z9.s");
- COMPARE_PREFIX(fsub(z14.VnD(), z4.VnD(), z19.VnD()),
- "fsub z14.d, z4.d, z19.d");
- COMPARE_PREFIX(frecps(z14.VnH(), z29.VnH(), z18.VnH()),
- "frecps z14.h, z29.h, z18.h");
- COMPARE_PREFIX(frecps(z14.VnS(), z29.VnS(), z18.VnS()),
- "frecps z14.s, z29.s, z18.s");
- COMPARE_PREFIX(frecps(z14.VnD(), z29.VnD(), z18.VnD()),
- "frecps z14.d, z29.d, z18.d");
- COMPARE_PREFIX(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()),
- "frsqrts z5.h, z6.h, z28.h");
- COMPARE_PREFIX(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()),
- "frsqrts z5.s, z6.s, z28.s");
- COMPARE_PREFIX(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()),
- "frsqrts z5.d, z6.d, z28.d");
- COMPARE_PREFIX(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()),
- "ftsmul z21.h, z17.h, z24.h");
- COMPARE_PREFIX(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()),
- "ftsmul z21.s, z17.s, z24.s");
- COMPARE_PREFIX(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()),
- "ftsmul z21.d, z17.d, z24.d");
+ COMPARE(fadd(z5.VnH(), z24.VnH(), z11.VnH()), "fadd z5.h, z24.h, z11.h");
+ COMPARE(fadd(z15.VnS(), z14.VnS(), z12.VnS()), "fadd z15.s, z14.s, z12.s");
+ COMPARE(fadd(z25.VnD(), z4.VnD(), z13.VnD()), "fadd z25.d, z4.d, z13.d");
+ COMPARE(fmul(z9.VnH(), z24.VnH(), z10.VnH()), "fmul z9.h, z24.h, z10.h");
+ COMPARE(fmul(z19.VnS(), z14.VnS(), z0.VnS()), "fmul z19.s, z14.s, z0.s");
+ COMPARE(fmul(z29.VnD(), z4.VnD(), z20.VnD()), "fmul z29.d, z4.d, z20.d");
+ COMPARE(fsub(z4.VnH(), z14.VnH(), z29.VnH()), "fsub z4.h, z14.h, z29.h");
+ COMPARE(fsub(z14.VnS(), z24.VnS(), z9.VnS()), "fsub z14.s, z24.s, z9.s");
+ COMPARE(fsub(z14.VnD(), z4.VnD(), z19.VnD()), "fsub z14.d, z4.d, z19.d");
+ COMPARE(frecps(z14.VnH(), z29.VnH(), z18.VnH()),
+ "frecps z14.h, z29.h, z18.h");
+ COMPARE(frecps(z14.VnS(), z29.VnS(), z18.VnS()),
+ "frecps z14.s, z29.s, z18.s");
+ COMPARE(frecps(z14.VnD(), z29.VnD(), z18.VnD()),
+ "frecps z14.d, z29.d, z18.d");
+ COMPARE(frsqrts(z5.VnH(), z6.VnH(), z28.VnH()), "frsqrts z5.h, z6.h, z28.h");
+ COMPARE(frsqrts(z5.VnS(), z6.VnS(), z28.VnS()), "frsqrts z5.s, z6.s, z28.s");
+ COMPARE(frsqrts(z5.VnD(), z6.VnD(), z28.VnD()), "frsqrts z5.d, z6.d, z28.d");
+ COMPARE(ftsmul(z21.VnH(), z17.VnH(), z24.VnH()),
+ "ftsmul z21.h, z17.h, z24.h");
+ COMPARE(ftsmul(z21.VnS(), z17.VnS(), z24.VnS()),
+ "ftsmul z21.s, z17.s, z24.s");
+ COMPARE(ftsmul(z21.VnD(), z17.VnD(), z24.VnD()),
+ "ftsmul z21.d, z17.d, z24.d");
CLEANUP();
}
@@ -1236,48 +1279,48 @@ TEST(sve_fp_compare_vectors) {
SETUP();
- COMPARE_PREFIX(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()),
- "facge p1.h, p3/z, z22.h, z25.h");
- COMPARE_PREFIX(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()),
- "facge p1.s, p3/z, z22.s, z25.s");
- COMPARE_PREFIX(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()),
- "facge p1.d, p3/z, z22.d, z25.d");
- COMPARE_PREFIX(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()),
- "facgt p8.h, p7/z, z25.h, z17.h");
- COMPARE_PREFIX(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()),
- "facgt p8.s, p7/z, z25.s, z17.s");
- COMPARE_PREFIX(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()),
- "facgt p8.d, p7/z, z25.d, z17.d");
- COMPARE_PREFIX(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()),
- "fcmeq p10.h, p2/z, z1.h, z17.h");
- COMPARE_PREFIX(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()),
- "fcmeq p10.s, p2/z, z1.s, z17.s");
- COMPARE_PREFIX(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()),
- "fcmeq p10.d, p2/z, z1.d, z17.d");
- COMPARE_PREFIX(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()),
- "fcmge p0.h, p0/z, z1.h, z0.h");
- COMPARE_PREFIX(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()),
- "fcmge p0.s, p0/z, z1.s, z0.s");
- COMPARE_PREFIX(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()),
- "fcmge p0.d, p0/z, z1.d, z0.d");
- COMPARE_PREFIX(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()),
- "fcmgt p15.h, p5/z, z26.h, z5.h");
- COMPARE_PREFIX(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()),
- "fcmgt p15.s, p5/z, z26.s, z5.s");
- COMPARE_PREFIX(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()),
- "fcmgt p15.d, p5/z, z26.d, z5.d");
- COMPARE_PREFIX(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()),
- "fcmne p2.h, p1/z, z9.h, z4.h");
- COMPARE_PREFIX(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()),
- "fcmne p2.s, p1/z, z9.s, z4.s");
- COMPARE_PREFIX(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()),
- "fcmne p2.d, p1/z, z9.d, z4.d");
- COMPARE_PREFIX(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()),
- "fcmuo p6.h, p4/z, z10.h, z21.h");
- COMPARE_PREFIX(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()),
- "fcmuo p6.s, p4/z, z10.s, z21.s");
- COMPARE_PREFIX(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()),
- "fcmuo p6.d, p4/z, z10.d, z21.d");
+ COMPARE(facge(p1.VnH(), p3.Zeroing(), z22.VnH(), z25.VnH()),
+ "facge p1.h, p3/z, z22.h, z25.h");
+ COMPARE(facge(p1.VnS(), p3.Zeroing(), z22.VnS(), z25.VnS()),
+ "facge p1.s, p3/z, z22.s, z25.s");
+ COMPARE(facge(p1.VnD(), p3.Zeroing(), z22.VnD(), z25.VnD()),
+ "facge p1.d, p3/z, z22.d, z25.d");
+ COMPARE(facgt(p8.VnH(), p7.Zeroing(), z25.VnH(), z17.VnH()),
+ "facgt p8.h, p7/z, z25.h, z17.h");
+ COMPARE(facgt(p8.VnS(), p7.Zeroing(), z25.VnS(), z17.VnS()),
+ "facgt p8.s, p7/z, z25.s, z17.s");
+ COMPARE(facgt(p8.VnD(), p7.Zeroing(), z25.VnD(), z17.VnD()),
+ "facgt p8.d, p7/z, z25.d, z17.d");
+ COMPARE(fcmeq(p10.VnH(), p2.Zeroing(), z1.VnH(), z17.VnH()),
+ "fcmeq p10.h, p2/z, z1.h, z17.h");
+ COMPARE(fcmeq(p10.VnS(), p2.Zeroing(), z1.VnS(), z17.VnS()),
+ "fcmeq p10.s, p2/z, z1.s, z17.s");
+ COMPARE(fcmeq(p10.VnD(), p2.Zeroing(), z1.VnD(), z17.VnD()),
+ "fcmeq p10.d, p2/z, z1.d, z17.d");
+ COMPARE(fcmge(p0.VnH(), p0.Zeroing(), z1.VnH(), z0.VnH()),
+ "fcmge p0.h, p0/z, z1.h, z0.h");
+ COMPARE(fcmge(p0.VnS(), p0.Zeroing(), z1.VnS(), z0.VnS()),
+ "fcmge p0.s, p0/z, z1.s, z0.s");
+ COMPARE(fcmge(p0.VnD(), p0.Zeroing(), z1.VnD(), z0.VnD()),
+ "fcmge p0.d, p0/z, z1.d, z0.d");
+ COMPARE(fcmgt(p15.VnH(), p5.Zeroing(), z26.VnH(), z5.VnH()),
+ "fcmgt p15.h, p5/z, z26.h, z5.h");
+ COMPARE(fcmgt(p15.VnS(), p5.Zeroing(), z26.VnS(), z5.VnS()),
+ "fcmgt p15.s, p5/z, z26.s, z5.s");
+ COMPARE(fcmgt(p15.VnD(), p5.Zeroing(), z26.VnD(), z5.VnD()),
+ "fcmgt p15.d, p5/z, z26.d, z5.d");
+ COMPARE(fcmne(p2.VnH(), p1.Zeroing(), z9.VnH(), z4.VnH()),
+ "fcmne p2.h, p1/z, z9.h, z4.h");
+ COMPARE(fcmne(p2.VnS(), p1.Zeroing(), z9.VnS(), z4.VnS()),
+ "fcmne p2.s, p1/z, z9.s, z4.s");
+ COMPARE(fcmne(p2.VnD(), p1.Zeroing(), z9.VnD(), z4.VnD()),
+ "fcmne p2.d, p1/z, z9.d, z4.d");
+ COMPARE(fcmuo(p6.VnH(), p4.Zeroing(), z10.VnH(), z21.VnH()),
+ "fcmuo p6.h, p4/z, z10.h, z21.h");
+ COMPARE(fcmuo(p6.VnS(), p4.Zeroing(), z10.VnS(), z21.VnS()),
+ "fcmuo p6.s, p4/z, z10.s, z21.s");
+ COMPARE(fcmuo(p6.VnD(), p4.Zeroing(), z10.VnD(), z21.VnD()),
+ "fcmuo p6.d, p4/z, z10.d, z21.d");
COMPARE_MACRO(Facle(p2.VnH(), p0.Zeroing(), z11.VnH(), z15.VnH()),
"facge p2.h, p0/z, z15.h, z11.h");
@@ -1311,42 +1354,42 @@ TEST(sve_fp_compare_vectors) {
TEST(sve_fp_compare_with_zero) {
SETUP();
- COMPARE_PREFIX(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0),
- "fcmeq p9.h, p1/z, z17.h, #0.0");
- COMPARE_PREFIX(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0),
- "fcmeq p9.s, p1/z, z17.s, #0.0");
- COMPARE_PREFIX(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0),
- "fcmeq p9.d, p1/z, z17.d, #0.0");
- COMPARE_PREFIX(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0),
- "fcmge p13.h, p3/z, z13.h, #0.0");
- COMPARE_PREFIX(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0),
- "fcmge p13.s, p3/z, z13.s, #0.0");
- COMPARE_PREFIX(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0),
- "fcmge p13.d, p3/z, z13.d, #0.0");
- COMPARE_PREFIX(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0),
- "fcmgt p10.h, p2/z, z24.h, #0.0");
- COMPARE_PREFIX(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0),
- "fcmgt p10.s, p2/z, z24.s, #0.0");
- COMPARE_PREFIX(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0),
- "fcmgt p10.d, p2/z, z24.d, #0.0");
- COMPARE_PREFIX(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0),
- "fcmle p4.h, p7/z, z1.h, #0.0");
- COMPARE_PREFIX(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0),
- "fcmle p4.s, p7/z, z1.s, #0.0");
- COMPARE_PREFIX(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0),
- "fcmle p4.d, p7/z, z1.d, #0.0");
- COMPARE_PREFIX(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0),
- "fcmlt p15.h, p7/z, z9.h, #0.0");
- COMPARE_PREFIX(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0),
- "fcmlt p15.s, p7/z, z9.s, #0.0");
- COMPARE_PREFIX(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0),
- "fcmlt p15.d, p7/z, z9.d, #0.0");
- COMPARE_PREFIX(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0),
- "fcmne p14.h, p7/z, z28.h, #0.0");
- COMPARE_PREFIX(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0),
- "fcmne p14.s, p7/z, z28.s, #0.0");
- COMPARE_PREFIX(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0),
- "fcmne p14.d, p7/z, z28.d, #0.0");
+ COMPARE(fcmeq(p9.VnH(), p1.Zeroing(), z17.VnH(), 0),
+ "fcmeq p9.h, p1/z, z17.h, #0.0");
+ COMPARE(fcmeq(p9.VnS(), p1.Zeroing(), z17.VnS(), 0),
+ "fcmeq p9.s, p1/z, z17.s, #0.0");
+ COMPARE(fcmeq(p9.VnD(), p1.Zeroing(), z17.VnD(), 0),
+ "fcmeq p9.d, p1/z, z17.d, #0.0");
+ COMPARE(fcmge(p13.VnH(), p3.Zeroing(), z13.VnH(), 0),
+ "fcmge p13.h, p3/z, z13.h, #0.0");
+ COMPARE(fcmge(p13.VnS(), p3.Zeroing(), z13.VnS(), 0),
+ "fcmge p13.s, p3/z, z13.s, #0.0");
+ COMPARE(fcmge(p13.VnD(), p3.Zeroing(), z13.VnD(), 0),
+ "fcmge p13.d, p3/z, z13.d, #0.0");
+ COMPARE(fcmgt(p10.VnH(), p2.Zeroing(), z24.VnH(), 0),
+ "fcmgt p10.h, p2/z, z24.h, #0.0");
+ COMPARE(fcmgt(p10.VnS(), p2.Zeroing(), z24.VnS(), 0),
+ "fcmgt p10.s, p2/z, z24.s, #0.0");
+ COMPARE(fcmgt(p10.VnD(), p2.Zeroing(), z24.VnD(), 0),
+ "fcmgt p10.d, p2/z, z24.d, #0.0");
+ COMPARE(fcmle(p4.VnH(), p7.Zeroing(), z1.VnH(), 0),
+ "fcmle p4.h, p7/z, z1.h, #0.0");
+ COMPARE(fcmle(p4.VnS(), p7.Zeroing(), z1.VnS(), 0),
+ "fcmle p4.s, p7/z, z1.s, #0.0");
+ COMPARE(fcmle(p4.VnD(), p7.Zeroing(), z1.VnD(), 0),
+ "fcmle p4.d, p7/z, z1.d, #0.0");
+ COMPARE(fcmlt(p15.VnH(), p7.Zeroing(), z9.VnH(), 0),
+ "fcmlt p15.h, p7/z, z9.h, #0.0");
+ COMPARE(fcmlt(p15.VnS(), p7.Zeroing(), z9.VnS(), 0),
+ "fcmlt p15.s, p7/z, z9.s, #0.0");
+ COMPARE(fcmlt(p15.VnD(), p7.Zeroing(), z9.VnD(), 0),
+ "fcmlt p15.d, p7/z, z9.d, #0.0");
+ COMPARE(fcmne(p14.VnH(), p7.Zeroing(), z28.VnH(), 0),
+ "fcmne p14.h, p7/z, z28.h, #0.0");
+ COMPARE(fcmne(p14.VnS(), p7.Zeroing(), z28.VnS(), 0),
+ "fcmne p14.s, p7/z, z28.s, #0.0");
+ COMPARE(fcmne(p14.VnD(), p7.Zeroing(), z28.VnD(), 0),
+ "fcmne p14.d, p7/z, z28.d, #0.0");
CLEANUP();
}
@@ -1354,18 +1397,18 @@ TEST(sve_fp_compare_with_zero) {
TEST(sve_fp_complex_addition) {
SETUP();
- COMPARE_PREFIX(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90),
- "fcadd z12.h, p5/m, z12.h, z13.h, #90");
- COMPARE_PREFIX(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90),
- "fcadd z12.s, p5/m, z12.s, z13.s, #90");
- COMPARE_PREFIX(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90),
- "fcadd z12.d, p5/m, z12.d, z13.d, #90");
- COMPARE_PREFIX(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270),
- "fcadd z22.h, p0/m, z22.h, z23.h, #270");
- COMPARE_PREFIX(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270),
- "fcadd z22.s, p0/m, z22.s, z23.s, #270");
- COMPARE_PREFIX(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270),
- "fcadd z22.d, p0/m, z22.d, z23.d, #270");
+ COMPARE(fcadd(z12.VnH(), p5.Merging(), z12.VnH(), z13.VnH(), 90),
+ "fcadd z12.h, p5/m, z12.h, z13.h, #90");
+ COMPARE(fcadd(z12.VnS(), p5.Merging(), z12.VnS(), z13.VnS(), 90),
+ "fcadd z12.s, p5/m, z12.s, z13.s, #90");
+ COMPARE(fcadd(z12.VnD(), p5.Merging(), z12.VnD(), z13.VnD(), 90),
+ "fcadd z12.d, p5/m, z12.d, z13.d, #90");
+ COMPARE(fcadd(z22.VnH(), p0.Merging(), z22.VnH(), z23.VnH(), 270),
+ "fcadd z22.h, p0/m, z22.h, z23.h, #270");
+ COMPARE(fcadd(z22.VnS(), p0.Merging(), z22.VnS(), z23.VnS(), 270),
+ "fcadd z22.s, p0/m, z22.s, z23.s, #270");
+ COMPARE(fcadd(z22.VnD(), p0.Merging(), z22.VnD(), z23.VnD(), 270),
+ "fcadd z22.d, p0/m, z22.d, z23.d, #270");
COMPARE_MACRO(Fcadd(z12.VnH(), p5.Merging(), z1.VnH(), z13.VnH(), 90),
"movprfx z12.h, p5/m, z1.h\n"
@@ -1380,19 +1423,92 @@ TEST(sve_fp_complex_addition) {
TEST(sve_fp_complex_mul_add) {
SETUP();
- COMPARE_PREFIX(fcmla(z19.VnH(), p7.Merging(), z16.VnH(), z0.VnH(), 90),
- "fcmla z19.h, p7/m, z16.h, z0.h, #90");
- COMPARE_PREFIX(fcmla(z19.VnS(), p7.Merging(), z16.VnS(), z0.VnS(), 90),
- "fcmla z19.s, p7/m, z16.s, z0.s, #90");
- COMPARE_PREFIX(fcmla(z19.VnD(), p7.Merging(), z16.VnD(), z0.VnD(), 90),
- "fcmla z19.d, p7/m, z16.d, z0.d, #90");
+ COMPARE_MACRO(Fcmla(z19.VnH(),
+ p7.Merging(),
+ z19.VnH(),
+ z16.VnH(),
+ z0.VnH(),
+ 90),
+ "fcmla z19.h, p7/m, z16.h, z0.h, #90");
+ COMPARE_MACRO(Fcmla(z19.VnS(),
+ p7.Merging(),
+ z19.VnS(),
+ z16.VnS(),
+ z0.VnS(),
+ 90),
+ "fcmla z19.s, p7/m, z16.s, z0.s, #90");
+ COMPARE_MACRO(Fcmla(z19.VnD(),
+ p7.Merging(),
+ z19.VnD(),
+ z16.VnD(),
+ z0.VnD(),
+ 90),
+ "fcmla z19.d, p7/m, z16.d, z0.d, #90");
- COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 0),
- "fcmla z20.d, p6/m, z15.d, z1.d, #0");
- COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 180),
- "fcmla z20.d, p6/m, z15.d, z1.d, #180");
- COMPARE_PREFIX(fcmla(z20.VnD(), p6.Merging(), z15.VnD(), z1.VnD(), 270),
- "fcmla z20.d, p6/m, z15.d, z1.d, #270");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z20.VnD(),
+ z15.VnD(),
+ z1.VnD(),
+ 0),
+ "fcmla z20.d, p6/m, z15.d, z1.d, #0");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z20.VnD(),
+ z15.VnD(),
+ z1.VnD(),
+ 180),
+ "fcmla z20.d, p6/m, z15.d, z1.d, #180");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z20.VnD(),
+ z15.VnD(),
+ z1.VnD(),
+ 270),
+ "fcmla z20.d, p6/m, z15.d, z1.d, #270");
+
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z20.VnD(),
+ z15.VnD(),
+ z20.VnD(),
+ 270),
+ "fcmla z20.d, p6/m, z15.d, z20.d, #270");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z21.VnD(),
+ z15.VnD(),
+ z1.VnD(),
+ 270),
+ "movprfx z20.d, p6/m, z21.d\n"
+ "fcmla z20.d, p6/m, z15.d, z1.d, #270");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z21.VnD(),
+ z20.VnD(),
+ z1.VnD(),
+ 270),
+ "movprfx z31, z21\n"
+ "fcmla z31.d, p6/m, z20.d, z1.d, #270\n"
+ "mov z20.d, p6/m, z31.d");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z21.VnD(),
+ z15.VnD(),
+ z20.VnD(),
+ 270),
+ "movprfx z31, z21\n"
+ "fcmla z31.d, p6/m, z15.d, z20.d, #270\n"
+ "mov z20.d, p6/m, z31.d");
+ COMPARE_MACRO(Fcmla(z20.VnD(),
+ p6.Merging(),
+ z21.VnD(),
+ z20.VnD(),
+ z20.VnD(),
+ 270),
+ "movprfx z31, z21\n"
+ "fcmla z31.d, p6/m, z20.d, z20.d, #270\n"
+ "mov z20.d, p6/m, z31.d");
CLEANUP();
}
@@ -1400,22 +1516,22 @@ TEST(sve_fp_complex_mul_add) {
TEST(sve_fp_complex_mul_add_index) {
SETUP();
- COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0),
- "fcmla z30.h, z20.h, z3.h[0], #0");
- COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0),
- "fcmla z30.h, z20.h, z3.h[1], #0");
- COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90),
- "fcmla z30.h, z20.h, z3.h[2], #90");
- COMPARE_PREFIX(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270),
- "fcmla z30.h, z20.h, z3.h[0], #270");
- COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0),
- "fcmla z10.s, z20.s, z1.s[0], #0");
- COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0),
- "fcmla z10.s, z20.s, z1.s[1], #0");
- COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90),
- "fcmla z10.s, z20.s, z1.s[1], #90");
- COMPARE_PREFIX(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270),
- "fcmla z10.s, z20.s, z1.s[0], #270");
+ COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 0),
+ "fcmla z30.h, z20.h, z3.h[0], #0");
+ COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 1, 0),
+ "fcmla z30.h, z20.h, z3.h[1], #0");
+ COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 2, 90),
+ "fcmla z30.h, z20.h, z3.h[2], #90");
+ COMPARE(fcmla(z30.VnH(), z20.VnH(), z3.VnH(), 0, 270),
+ "fcmla z30.h, z20.h, z3.h[0], #270");
+ COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 0),
+ "fcmla z10.s, z20.s, z1.s[0], #0");
+ COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 0),
+ "fcmla z10.s, z20.s, z1.s[1], #0");
+ COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 1, 90),
+ "fcmla z10.s, z20.s, z1.s[1], #90");
+ COMPARE(fcmla(z10.VnS(), z20.VnS(), z1.VnS(), 0, 270),
+ "fcmla z10.s, z20.s, z1.s[0], #270");
CLEANUP();
}
@@ -1423,21 +1539,21 @@ TEST(sve_fp_complex_mul_add_index) {
TEST(sve_fp_fast_reduction) {
SETUP();
- COMPARE_PREFIX(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h");
- COMPARE_PREFIX(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s");
- COMPARE_PREFIX(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d");
- COMPARE_PREFIX(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h");
- COMPARE_PREFIX(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s");
- COMPARE_PREFIX(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d");
- COMPARE_PREFIX(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h");
- COMPARE_PREFIX(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s");
- COMPARE_PREFIX(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d");
- COMPARE_PREFIX(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h");
- COMPARE_PREFIX(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s");
- COMPARE_PREFIX(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d");
- COMPARE_PREFIX(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h");
- COMPARE_PREFIX(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s");
- COMPARE_PREFIX(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d");
+ COMPARE(faddv(h26, p6, z16.VnH()), "faddv h26, p6, z16.h");
+ COMPARE(faddv(s26, p6, z16.VnS()), "faddv s26, p6, z16.s");
+ COMPARE(faddv(d26, p6, z16.VnD()), "faddv d26, p6, z16.d");
+ COMPARE(fmaxnmv(h28, p1, z0.VnH()), "fmaxnmv h28, p1, z0.h");
+ COMPARE(fmaxnmv(s28, p1, z0.VnS()), "fmaxnmv s28, p1, z0.s");
+ COMPARE(fmaxnmv(d28, p1, z0.VnD()), "fmaxnmv d28, p1, z0.d");
+ COMPARE(fmaxv(h3, p1, z23.VnH()), "fmaxv h3, p1, z23.h");
+ COMPARE(fmaxv(s3, p1, z23.VnS()), "fmaxv s3, p1, z23.s");
+ COMPARE(fmaxv(d3, p1, z23.VnD()), "fmaxv d3, p1, z23.d");
+ COMPARE(fminnmv(h20, p6, z21.VnH()), "fminnmv h20, p6, z21.h");
+ COMPARE(fminnmv(s20, p6, z21.VnS()), "fminnmv s20, p6, z21.s");
+ COMPARE(fminnmv(d20, p6, z21.VnD()), "fminnmv d20, p6, z21.d");
+ COMPARE(fminv(h10, p4, z27.VnH()), "fminv h10, p4, z27.h");
+ COMPARE(fminv(s10, p4, z27.VnS()), "fminv s10, p4, z27.s");
+ COMPARE(fminv(d10, p4, z27.VnD()), "fminv d10, p4, z27.d");
CLEANUP();
}
@@ -1445,59 +1561,62 @@ TEST(sve_fp_fast_reduction) {
TEST(sve_fp_mul_add) {
SETUP();
- COMPARE_PREFIX(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()),
- "fmad z31.h, p2/m, z8.h, z1.h");
- COMPARE_PREFIX(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()),
- "fmad z31.s, p2/m, z8.s, z1.s");
- COMPARE_PREFIX(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()),
- "fmad z31.d, p2/m, z8.d, z1.d");
- COMPARE_PREFIX(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()),
- "fmla z26.h, p7/m, z19.h, z16.h");
- COMPARE_PREFIX(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()),
- "fmla z26.s, p7/m, z19.s, z16.s");
- COMPARE_PREFIX(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()),
- "fmla z26.d, p7/m, z19.d, z16.d");
- COMPARE_PREFIX(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()),
- "fmls z20.h, p6/m, z28.h, z0.h");
- COMPARE_PREFIX(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()),
- "fmls z20.s, p6/m, z28.s, z0.s");
- COMPARE_PREFIX(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()),
- "fmls z20.d, p6/m, z28.d, z0.d");
- COMPARE_PREFIX(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()),
- "fmsb z3.h, p4/m, z8.h, z22.h");
- COMPARE_PREFIX(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()),
- "fmsb z3.s, p4/m, z8.s, z22.s");
- COMPARE_PREFIX(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()),
- "fmsb z3.d, p4/m, z8.d, z22.d");
- COMPARE_PREFIX(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()),
- "fnmad z0.h, p5/m, z20.h, z17.h");
- COMPARE_PREFIX(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()),
- "fnmad z0.s, p5/m, z20.s, z17.s");
- COMPARE_PREFIX(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()),
- "fnmad z0.d, p5/m, z20.d, z17.d");
- COMPARE_PREFIX(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()),
- "fnmla z31.h, p6/m, z14.h, z8.h");
- COMPARE_PREFIX(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()),
- "fnmla z31.s, p6/m, z14.s, z8.s");
- COMPARE_PREFIX(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()),
- "fnmla z31.d, p6/m, z14.d, z8.d");
- COMPARE_PREFIX(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()),
- "fnmls z2.h, p1/m, z23.h, z15.h");
- COMPARE_PREFIX(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()),
- "fnmls z2.s, p1/m, z23.s, z15.s");
- COMPARE_PREFIX(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()),
- "fnmls z2.d, p1/m, z23.d, z15.d");
- COMPARE_PREFIX(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()),
- "fnmsb z28.h, p3/m, z26.h, z11.h");
- COMPARE_PREFIX(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()),
- "fnmsb z28.s, p3/m, z26.s, z11.s");
- COMPARE_PREFIX(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()),
- "fnmsb z28.d, p3/m, z26.d, z11.d");
+ COMPARE(fmad(z31.VnH(), p2.Merging(), z8.VnH(), z1.VnH()),
+ "fmad z31.h, p2/m, z8.h, z1.h");
+ COMPARE(fmad(z31.VnS(), p2.Merging(), z8.VnS(), z1.VnS()),
+ "fmad z31.s, p2/m, z8.s, z1.s");
+ COMPARE(fmad(z31.VnD(), p2.Merging(), z8.VnD(), z1.VnD()),
+ "fmad z31.d, p2/m, z8.d, z1.d");
+ COMPARE(fmla(z26.VnH(), p7.Merging(), z19.VnH(), z16.VnH()),
+ "fmla z26.h, p7/m, z19.h, z16.h");
+ COMPARE(fmla(z26.VnS(), p7.Merging(), z19.VnS(), z16.VnS()),
+ "fmla z26.s, p7/m, z19.s, z16.s");
+ COMPARE(fmla(z26.VnD(), p7.Merging(), z19.VnD(), z16.VnD()),
+ "fmla z26.d, p7/m, z19.d, z16.d");
+ COMPARE(fmls(z20.VnH(), p6.Merging(), z28.VnH(), z0.VnH()),
+ "fmls z20.h, p6/m, z28.h, z0.h");
+ COMPARE(fmls(z20.VnS(), p6.Merging(), z28.VnS(), z0.VnS()),
+ "fmls z20.s, p6/m, z28.s, z0.s");
+ COMPARE(fmls(z20.VnD(), p6.Merging(), z28.VnD(), z0.VnD()),
+ "fmls z20.d, p6/m, z28.d, z0.d");
+ COMPARE(fmsb(z3.VnH(), p4.Merging(), z8.VnH(), z22.VnH()),
+ "fmsb z3.h, p4/m, z8.h, z22.h");
+ COMPARE(fmsb(z3.VnS(), p4.Merging(), z8.VnS(), z22.VnS()),
+ "fmsb z3.s, p4/m, z8.s, z22.s");
+ COMPARE(fmsb(z3.VnD(), p4.Merging(), z8.VnD(), z22.VnD()),
+ "fmsb z3.d, p4/m, z8.d, z22.d");
+ COMPARE(fnmad(z0.VnH(), p5.Merging(), z20.VnH(), z17.VnH()),
+ "fnmad z0.h, p5/m, z20.h, z17.h");
+ COMPARE(fnmad(z0.VnS(), p5.Merging(), z20.VnS(), z17.VnS()),
+ "fnmad z0.s, p5/m, z20.s, z17.s");
+ COMPARE(fnmad(z0.VnD(), p5.Merging(), z20.VnD(), z17.VnD()),
+ "fnmad z0.d, p5/m, z20.d, z17.d");
+ COMPARE(fnmla(z31.VnH(), p6.Merging(), z14.VnH(), z8.VnH()),
+ "fnmla z31.h, p6/m, z14.h, z8.h");
+ COMPARE(fnmla(z31.VnS(), p6.Merging(), z14.VnS(), z8.VnS()),
+ "fnmla z31.s, p6/m, z14.s, z8.s");
+ COMPARE(fnmla(z31.VnD(), p6.Merging(), z14.VnD(), z8.VnD()),
+ "fnmla z31.d, p6/m, z14.d, z8.d");
+ COMPARE(fnmls(z2.VnH(), p1.Merging(), z23.VnH(), z15.VnH()),
+ "fnmls z2.h, p1/m, z23.h, z15.h");
+ COMPARE(fnmls(z2.VnS(), p1.Merging(), z23.VnS(), z15.VnS()),
+ "fnmls z2.s, p1/m, z23.s, z15.s");
+ COMPARE(fnmls(z2.VnD(), p1.Merging(), z23.VnD(), z15.VnD()),
+ "fnmls z2.d, p1/m, z23.d, z15.d");
+ COMPARE(fnmsb(z28.VnH(), p3.Merging(), z26.VnH(), z11.VnH()),
+ "fnmsb z28.h, p3/m, z26.h, z11.h");
+ COMPARE(fnmsb(z28.VnS(), p3.Merging(), z26.VnS(), z11.VnS()),
+ "fnmsb z28.s, p3/m, z26.s, z11.s");
+ COMPARE(fnmsb(z28.VnD(), p3.Merging(), z26.VnD(), z11.VnD()),
+ "fnmsb z28.d, p3/m, z26.d, z11.d");
CLEANUP();
}
TEST(sve_fp_mul_add_macro_strict_nan_propagation) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef StrictNaNPropagationMacroAssembler MacroAssembler;
@@ -1553,9 +1672,14 @@ TEST(sve_fp_mul_add_macro_strict_nan_propagation) {
"fnmls z15.d, p0/m, z17.d, z18.d");
CLEANUP();
+
+#pragma GCC diagnostic pop
}
TEST(sve_fp_mul_add_macro_fast_nan_propagation) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
// Shadow the `MacroAssembler` type so that the test macros work without
// modification.
typedef FastNaNPropagationMacroAssembler MacroAssembler;
@@ -1603,56 +1727,46 @@ TEST(sve_fp_mul_add_macro_fast_nan_propagation) {
"fnmls z15.d, p0/m, z17.d, z18.d");
CLEANUP();
+
+#pragma GCC diagnostic pop
}
TEST(sve_fp_mul_add_index) {
SETUP();
- COMPARE_PREFIX(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0),
- "fmla z25.d, z9.d, z1.d[0]");
- COMPARE_PREFIX(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1),
- "fmla z25.d, z9.d, z1.d[1]");
-
- COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0),
- "fmla z13.h, z7.h, z7.h[0]");
- COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2),
- "fmla z13.h, z7.h, z7.h[2]");
- COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5),
- "fmla z13.h, z7.h, z7.h[5]");
- COMPARE_PREFIX(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7),
- "fmla z13.h, z7.h, z7.h[7]");
-
- COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0),
- "fmla z17.s, z27.s, z2.s[0]");
- COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1),
- "fmla z17.s, z27.s, z2.s[1]");
- COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2),
- "fmla z17.s, z27.s, z2.s[2]");
- COMPARE_PREFIX(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3),
- "fmla z17.s, z27.s, z2.s[3]");
-
- COMPARE_PREFIX(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0),
- "fmls z28.d, z2.d, z0.d[0]");
- COMPARE_PREFIX(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1),
- "fmls z28.d, z2.d, z0.d[1]");
-
- COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1),
- "fmls z30.h, z29.h, z7.h[1]");
- COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4),
- "fmls z30.h, z29.h, z7.h[4]");
- COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3),
- "fmls z30.h, z29.h, z7.h[3]");
- COMPARE_PREFIX(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6),
- "fmls z30.h, z29.h, z7.h[6]");
-
- COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0),
- "fmls z30.s, z1.s, z6.s[0]");
- COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1),
- "fmls z30.s, z1.s, z6.s[1]");
- COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2),
- "fmls z30.s, z1.s, z6.s[2]");
- COMPARE_PREFIX(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3),
- "fmls z30.s, z1.s, z6.s[3]");
+ COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 0), "fmla z25.d, z9.d, z1.d[0]");
+ COMPARE(fmla(z25.VnD(), z9.VnD(), z1.VnD(), 1), "fmla z25.d, z9.d, z1.d[1]");
+
+ COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 0), "fmla z13.h, z7.h, z7.h[0]");
+ COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 2), "fmla z13.h, z7.h, z7.h[2]");
+ COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 5), "fmla z13.h, z7.h, z7.h[5]");
+ COMPARE(fmla(z13.VnH(), z7.VnH(), z7.VnH(), 7), "fmla z13.h, z7.h, z7.h[7]");
+
+ COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 0),
+ "fmla z17.s, z27.s, z2.s[0]");
+ COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 1),
+ "fmla z17.s, z27.s, z2.s[1]");
+ COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 2),
+ "fmla z17.s, z27.s, z2.s[2]");
+ COMPARE(fmla(z17.VnS(), z27.VnS(), z2.VnS(), 3),
+ "fmla z17.s, z27.s, z2.s[3]");
+
+ COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 0), "fmls z28.d, z2.d, z0.d[0]");
+ COMPARE(fmls(z28.VnD(), z2.VnD(), z0.VnD(), 1), "fmls z28.d, z2.d, z0.d[1]");
+
+ COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 1),
+ "fmls z30.h, z29.h, z7.h[1]");
+ COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 4),
+ "fmls z30.h, z29.h, z7.h[4]");
+ COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 3),
+ "fmls z30.h, z29.h, z7.h[3]");
+ COMPARE(fmls(z30.VnH(), z29.VnH(), z7.VnH(), 6),
+ "fmls z30.h, z29.h, z7.h[6]");
+
+ COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 0), "fmls z30.s, z1.s, z6.s[0]");
+ COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 1), "fmls z30.s, z1.s, z6.s[1]");
+ COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 2), "fmls z30.s, z1.s, z6.s[2]");
+ COMPARE(fmls(z30.VnS(), z1.VnS(), z6.VnS(), 3), "fmls z30.s, z1.s, z6.s[3]");
COMPARE_MACRO(Fmla(z10.VnH(), z11.VnH(), z12.VnH(), z4.VnH(), 7),
"movprfx z10, z11\n"
@@ -1688,28 +1802,18 @@ TEST(sve_fp_mul_add_index) {
TEST(sve_fp_mul_index) {
SETUP();
- COMPARE_PREFIX(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0),
- "fmul z12.d, z3.d, z4.d[0]");
- COMPARE_PREFIX(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1),
- "fmul z12.d, z3.d, z4.d[1]");
+ COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 0), "fmul z12.d, z3.d, z4.d[0]");
+ COMPARE(fmul(z12.VnD(), z3.VnD(), z4.VnD(), 1), "fmul z12.d, z3.d, z4.d[1]");
- COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0),
- "fmul z22.h, z2.h, z3.h[0]");
- COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3),
- "fmul z22.h, z2.h, z3.h[3]");
- COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4),
- "fmul z22.h, z2.h, z3.h[4]");
- COMPARE_PREFIX(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7),
- "fmul z22.h, z2.h, z3.h[7]");
+ COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 0), "fmul z22.h, z2.h, z3.h[0]");
+ COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 3), "fmul z22.h, z2.h, z3.h[3]");
+ COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 4), "fmul z22.h, z2.h, z3.h[4]");
+ COMPARE(fmul(z22.VnH(), z2.VnH(), z3.VnH(), 7), "fmul z22.h, z2.h, z3.h[7]");
- COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0),
- "fmul z2.s, z8.s, z7.s[0]");
- COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1),
- "fmul z2.s, z8.s, z7.s[1]");
- COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2),
- "fmul z2.s, z8.s, z7.s[2]");
- COMPARE_PREFIX(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3),
- "fmul z2.s, z8.s, z7.s[3]");
+ COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 0), "fmul z2.s, z8.s, z7.s[0]");
+ COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 1), "fmul z2.s, z8.s, z7.s[1]");
+ COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 2), "fmul z2.s, z8.s, z7.s[2]");
+ COMPARE(fmul(z2.VnS(), z8.VnS(), z7.VnS(), 3), "fmul z2.s, z8.s, z7.s[3]");
CLEANUP();
}
@@ -1717,128 +1821,114 @@ TEST(sve_fp_mul_index) {
TEST(sve_fp_unary_op_predicated) {
SETUP();
- COMPARE_PREFIX(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()),
- "fcvtzs z29.s, p5/m, z8.d");
- COMPARE_PREFIX(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()),
- "fcvtzs z30.d, p5/m, z8.d");
- COMPARE_PREFIX(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()),
- "fcvtzs z14.h, p1/m, z29.h");
- COMPARE_PREFIX(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()),
- "fcvtzs z11.s, p3/m, z16.h");
- COMPARE_PREFIX(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()),
- "fcvtzs z4.d, p7/m, z4.h");
- COMPARE_PREFIX(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()),
- "fcvtzs z24.s, p1/m, z4.s");
- COMPARE_PREFIX(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()),
- "fcvtzs z25.d, p4/m, z24.s");
- COMPARE_PREFIX(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()),
- "fcvtzu z16.s, p7/m, z14.d");
- COMPARE_PREFIX(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()),
- "fcvtzu z31.d, p1/m, z16.d");
- COMPARE_PREFIX(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()),
- "fcvtzu z12.h, p2/m, z27.h");
- COMPARE_PREFIX(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()),
- "fcvtzu z26.s, p6/m, z29.h");
- COMPARE_PREFIX(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()),
- "fcvtzu z29.d, p5/m, z27.h");
- COMPARE_PREFIX(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()),
- "fcvtzu z13.s, p2/m, z17.s");
- COMPARE_PREFIX(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()),
- "fcvtzu z25.d, p7/m, z28.s");
- COMPARE_PREFIX(scvtf(z16.VnH(), p6.Merging(), z5.VnH()),
- "scvtf z16.h, p6/m, z5.h");
- COMPARE_PREFIX(scvtf(z31.VnD(), p5.Merging(), z26.VnS()),
- "scvtf z31.d, p5/m, z26.s");
- COMPARE_PREFIX(scvtf(z0.VnH(), p7.Merging(), z0.VnS()),
- "scvtf z0.h, p7/m, z0.s");
- COMPARE_PREFIX(scvtf(z12.VnS(), p7.Merging(), z0.VnS()),
- "scvtf z12.s, p7/m, z0.s");
- COMPARE_PREFIX(scvtf(z17.VnD(), p1.Merging(), z17.VnD()),
- "scvtf z17.d, p1/m, z17.d");
- COMPARE_PREFIX(scvtf(z2.VnH(), p0.Merging(), z9.VnD()),
- "scvtf z2.h, p0/m, z9.d");
- COMPARE_PREFIX(scvtf(z26.VnS(), p5.Merging(), z4.VnD()),
- "scvtf z26.s, p5/m, z4.d");
- COMPARE_PREFIX(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()),
- "ucvtf z27.h, p4/m, z25.h");
- COMPARE_PREFIX(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()),
- "ucvtf z3.d, p4/m, z3.s");
- COMPARE_PREFIX(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()),
- "ucvtf z24.h, p2/m, z29.s");
- COMPARE_PREFIX(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()),
- "ucvtf z29.s, p5/m, z14.s");
- COMPARE_PREFIX(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()),
- "ucvtf z7.d, p2/m, z14.d");
- COMPARE_PREFIX(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()),
- "ucvtf z20.h, p2/m, z14.d");
- COMPARE_PREFIX(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()),
- "ucvtf z26.s, p1/m, z18.d");
- COMPARE_PREFIX(frinta(z11.VnH(), p0.Merging(), z3.VnH()),
- "frinta z11.h, p0/m, z3.h");
- COMPARE_PREFIX(frinta(z11.VnS(), p0.Merging(), z3.VnS()),
- "frinta z11.s, p0/m, z3.s");
- COMPARE_PREFIX(frinta(z11.VnD(), p0.Merging(), z3.VnD()),
- "frinta z11.d, p0/m, z3.d");
- COMPARE_PREFIX(frinti(z17.VnH(), p0.Merging(), z16.VnH()),
- "frinti z17.h, p0/m, z16.h");
- COMPARE_PREFIX(frinti(z17.VnS(), p0.Merging(), z16.VnS()),
- "frinti z17.s, p0/m, z16.s");
- COMPARE_PREFIX(frinti(z17.VnD(), p0.Merging(), z16.VnD()),
- "frinti z17.d, p0/m, z16.d");
- COMPARE_PREFIX(frintm(z2.VnH(), p7.Merging(), z15.VnH()),
- "frintm z2.h, p7/m, z15.h");
- COMPARE_PREFIX(frintm(z2.VnS(), p7.Merging(), z15.VnS()),
- "frintm z2.s, p7/m, z15.s");
- COMPARE_PREFIX(frintm(z2.VnD(), p7.Merging(), z15.VnD()),
- "frintm z2.d, p7/m, z15.d");
- COMPARE_PREFIX(frintn(z14.VnH(), p5.Merging(), z18.VnH()),
- "frintn z14.h, p5/m, z18.h");
- COMPARE_PREFIX(frintn(z14.VnS(), p5.Merging(), z18.VnS()),
- "frintn z14.s, p5/m, z18.s");
- COMPARE_PREFIX(frintn(z14.VnD(), p5.Merging(), z18.VnD()),
- "frintn z14.d, p5/m, z18.d");
- COMPARE_PREFIX(frintp(z20.VnH(), p6.Merging(), z23.VnH()),
- "frintp z20.h, p6/m, z23.h");
- COMPARE_PREFIX(frintp(z20.VnS(), p6.Merging(), z23.VnS()),
- "frintp z20.s, p6/m, z23.s");
- COMPARE_PREFIX(frintp(z20.VnD(), p6.Merging(), z23.VnD()),
- "frintp z20.d, p6/m, z23.d");
- COMPARE_PREFIX(frintx(z2.VnH(), p6.Merging(), z18.VnH()),
- "frintx z2.h, p6/m, z18.h");
- COMPARE_PREFIX(frintx(z2.VnS(), p6.Merging(), z18.VnS()),
- "frintx z2.s, p6/m, z18.s");
- COMPARE_PREFIX(frintx(z2.VnD(), p6.Merging(), z18.VnD()),
- "frintx z2.d, p6/m, z18.d");
- COMPARE_PREFIX(frintz(z26.VnH(), p7.Merging(), z25.VnH()),
- "frintz z26.h, p7/m, z25.h");
- COMPARE_PREFIX(frintz(z26.VnS(), p7.Merging(), z25.VnS()),
- "frintz z26.s, p7/m, z25.s");
- COMPARE_PREFIX(frintz(z26.VnD(), p7.Merging(), z25.VnD()),
- "frintz z26.d, p7/m, z25.d");
- COMPARE_PREFIX(fcvt(z5.VnH(), p2.Merging(), z11.VnD()),
- "fcvt z5.h, p2/m, z11.d");
- COMPARE_PREFIX(fcvt(z30.VnS(), p7.Merging(), z0.VnD()),
- "fcvt z30.s, p7/m, z0.d");
- COMPARE_PREFIX(fcvt(z10.VnD(), p0.Merging(), z17.VnH()),
- "fcvt z10.d, p0/m, z17.h");
- COMPARE_PREFIX(fcvt(z28.VnS(), p3.Merging(), z27.VnH()),
- "fcvt z28.s, p3/m, z27.h");
- COMPARE_PREFIX(fcvt(z9.VnD(), p7.Merging(), z0.VnS()),
- "fcvt z9.d, p7/m, z0.s");
- COMPARE_PREFIX(fcvt(z27.VnH(), p7.Merging(), z9.VnS()),
- "fcvt z27.h, p7/m, z9.s");
- COMPARE_PREFIX(frecpx(z16.VnH(), p1.Merging(), z29.VnH()),
- "frecpx z16.h, p1/m, z29.h");
- COMPARE_PREFIX(frecpx(z16.VnS(), p1.Merging(), z29.VnS()),
- "frecpx z16.s, p1/m, z29.s");
- COMPARE_PREFIX(frecpx(z16.VnD(), p1.Merging(), z29.VnD()),
- "frecpx z16.d, p1/m, z29.d");
- COMPARE_PREFIX(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()),
- "fsqrt z30.h, p3/m, z13.h");
- COMPARE_PREFIX(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()),
- "fsqrt z30.s, p3/m, z13.s");
- COMPARE_PREFIX(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()),
- "fsqrt z30.d, p3/m, z13.d");
+ COMPARE(fcvtzs(z29.VnS(), p5.Merging(), z8.VnD()),
+ "fcvtzs z29.s, p5/m, z8.d");
+ COMPARE(fcvtzs(z30.VnD(), p5.Merging(), z8.VnD()),
+ "fcvtzs z30.d, p5/m, z8.d");
+ COMPARE(fcvtzs(z14.VnH(), p1.Merging(), z29.VnH()),
+ "fcvtzs z14.h, p1/m, z29.h");
+ COMPARE(fcvtzs(z11.VnS(), p3.Merging(), z16.VnH()),
+ "fcvtzs z11.s, p3/m, z16.h");
+ COMPARE(fcvtzs(z4.VnD(), p7.Merging(), z4.VnH()), "fcvtzs z4.d, p7/m, z4.h");
+ COMPARE(fcvtzs(z24.VnS(), p1.Merging(), z4.VnS()),
+ "fcvtzs z24.s, p1/m, z4.s");
+ COMPARE(fcvtzs(z25.VnD(), p4.Merging(), z24.VnS()),
+ "fcvtzs z25.d, p4/m, z24.s");
+ COMPARE(fcvtzu(z16.VnS(), p7.Merging(), z14.VnD()),
+ "fcvtzu z16.s, p7/m, z14.d");
+ COMPARE(fcvtzu(z31.VnD(), p1.Merging(), z16.VnD()),
+ "fcvtzu z31.d, p1/m, z16.d");
+ COMPARE(fcvtzu(z12.VnH(), p2.Merging(), z27.VnH()),
+ "fcvtzu z12.h, p2/m, z27.h");
+ COMPARE(fcvtzu(z26.VnS(), p6.Merging(), z29.VnH()),
+ "fcvtzu z26.s, p6/m, z29.h");
+ COMPARE(fcvtzu(z29.VnD(), p5.Merging(), z27.VnH()),
+ "fcvtzu z29.d, p5/m, z27.h");
+ COMPARE(fcvtzu(z13.VnS(), p2.Merging(), z17.VnS()),
+ "fcvtzu z13.s, p2/m, z17.s");
+ COMPARE(fcvtzu(z25.VnD(), p7.Merging(), z28.VnS()),
+ "fcvtzu z25.d, p7/m, z28.s");
+ COMPARE(scvtf(z16.VnH(), p6.Merging(), z5.VnH()), "scvtf z16.h, p6/m, z5.h");
+ COMPARE(scvtf(z31.VnD(), p5.Merging(), z26.VnS()),
+ "scvtf z31.d, p5/m, z26.s");
+ COMPARE(scvtf(z0.VnH(), p7.Merging(), z0.VnS()), "scvtf z0.h, p7/m, z0.s");
+ COMPARE(scvtf(z12.VnS(), p7.Merging(), z0.VnS()), "scvtf z12.s, p7/m, z0.s");
+ COMPARE(scvtf(z17.VnD(), p1.Merging(), z17.VnD()),
+ "scvtf z17.d, p1/m, z17.d");
+ COMPARE(scvtf(z2.VnH(), p0.Merging(), z9.VnD()), "scvtf z2.h, p0/m, z9.d");
+ COMPARE(scvtf(z26.VnS(), p5.Merging(), z4.VnD()), "scvtf z26.s, p5/m, z4.d");
+ COMPARE(ucvtf(z27.VnH(), p4.Merging(), z25.VnH()),
+ "ucvtf z27.h, p4/m, z25.h");
+ COMPARE(ucvtf(z3.VnD(), p4.Merging(), z3.VnS()), "ucvtf z3.d, p4/m, z3.s");
+ COMPARE(ucvtf(z24.VnH(), p2.Merging(), z29.VnS()),
+ "ucvtf z24.h, p2/m, z29.s");
+ COMPARE(ucvtf(z29.VnS(), p5.Merging(), z14.VnS()),
+ "ucvtf z29.s, p5/m, z14.s");
+ COMPARE(ucvtf(z7.VnD(), p2.Merging(), z14.VnD()), "ucvtf z7.d, p2/m, z14.d");
+ COMPARE(ucvtf(z20.VnH(), p2.Merging(), z14.VnD()),
+ "ucvtf z20.h, p2/m, z14.d");
+ COMPARE(ucvtf(z26.VnS(), p1.Merging(), z18.VnD()),
+ "ucvtf z26.s, p1/m, z18.d");
+ COMPARE(frinta(z11.VnH(), p0.Merging(), z3.VnH()),
+ "frinta z11.h, p0/m, z3.h");
+ COMPARE(frinta(z11.VnS(), p0.Merging(), z3.VnS()),
+ "frinta z11.s, p0/m, z3.s");
+ COMPARE(frinta(z11.VnD(), p0.Merging(), z3.VnD()),
+ "frinta z11.d, p0/m, z3.d");
+ COMPARE(frinti(z17.VnH(), p0.Merging(), z16.VnH()),
+ "frinti z17.h, p0/m, z16.h");
+ COMPARE(frinti(z17.VnS(), p0.Merging(), z16.VnS()),
+ "frinti z17.s, p0/m, z16.s");
+ COMPARE(frinti(z17.VnD(), p0.Merging(), z16.VnD()),
+ "frinti z17.d, p0/m, z16.d");
+ COMPARE(frintm(z2.VnH(), p7.Merging(), z15.VnH()),
+ "frintm z2.h, p7/m, z15.h");
+ COMPARE(frintm(z2.VnS(), p7.Merging(), z15.VnS()),
+ "frintm z2.s, p7/m, z15.s");
+ COMPARE(frintm(z2.VnD(), p7.Merging(), z15.VnD()),
+ "frintm z2.d, p7/m, z15.d");
+ COMPARE(frintn(z14.VnH(), p5.Merging(), z18.VnH()),
+ "frintn z14.h, p5/m, z18.h");
+ COMPARE(frintn(z14.VnS(), p5.Merging(), z18.VnS()),
+ "frintn z14.s, p5/m, z18.s");
+ COMPARE(frintn(z14.VnD(), p5.Merging(), z18.VnD()),
+ "frintn z14.d, p5/m, z18.d");
+ COMPARE(frintp(z20.VnH(), p6.Merging(), z23.VnH()),
+ "frintp z20.h, p6/m, z23.h");
+ COMPARE(frintp(z20.VnS(), p6.Merging(), z23.VnS()),
+ "frintp z20.s, p6/m, z23.s");
+ COMPARE(frintp(z20.VnD(), p6.Merging(), z23.VnD()),
+ "frintp z20.d, p6/m, z23.d");
+ COMPARE(frintx(z2.VnH(), p6.Merging(), z18.VnH()),
+ "frintx z2.h, p6/m, z18.h");
+ COMPARE(frintx(z2.VnS(), p6.Merging(), z18.VnS()),
+ "frintx z2.s, p6/m, z18.s");
+ COMPARE(frintx(z2.VnD(), p6.Merging(), z18.VnD()),
+ "frintx z2.d, p6/m, z18.d");
+ COMPARE(frintz(z26.VnH(), p7.Merging(), z25.VnH()),
+ "frintz z26.h, p7/m, z25.h");
+ COMPARE(frintz(z26.VnS(), p7.Merging(), z25.VnS()),
+ "frintz z26.s, p7/m, z25.s");
+ COMPARE(frintz(z26.VnD(), p7.Merging(), z25.VnD()),
+ "frintz z26.d, p7/m, z25.d");
+ COMPARE(fcvt(z5.VnH(), p2.Merging(), z11.VnD()), "fcvt z5.h, p2/m, z11.d");
+ COMPARE(fcvt(z30.VnS(), p7.Merging(), z0.VnD()), "fcvt z30.s, p7/m, z0.d");
+ COMPARE(fcvt(z10.VnD(), p0.Merging(), z17.VnH()), "fcvt z10.d, p0/m, z17.h");
+ COMPARE(fcvt(z28.VnS(), p3.Merging(), z27.VnH()), "fcvt z28.s, p3/m, z27.h");
+ COMPARE(fcvt(z9.VnD(), p7.Merging(), z0.VnS()), "fcvt z9.d, p7/m, z0.s");
+ COMPARE(fcvt(z27.VnH(), p7.Merging(), z9.VnS()), "fcvt z27.h, p7/m, z9.s");
+ COMPARE(frecpx(z16.VnH(), p1.Merging(), z29.VnH()),
+ "frecpx z16.h, p1/m, z29.h");
+ COMPARE(frecpx(z16.VnS(), p1.Merging(), z29.VnS()),
+ "frecpx z16.s, p1/m, z29.s");
+ COMPARE(frecpx(z16.VnD(), p1.Merging(), z29.VnD()),
+ "frecpx z16.d, p1/m, z29.d");
+ COMPARE(fsqrt(z30.VnH(), p3.Merging(), z13.VnH()),
+ "fsqrt z30.h, p3/m, z13.h");
+ COMPARE(fsqrt(z30.VnS(), p3.Merging(), z13.VnS()),
+ "fsqrt z30.s, p3/m, z13.s");
+ COMPARE(fsqrt(z30.VnD(), p3.Merging(), z13.VnD()),
+ "fsqrt z30.d, p3/m, z13.d");
CLEANUP();
}
@@ -1910,12 +2000,12 @@ TEST(sve_fp_unary_op_predicated_macro) {
TEST(sve_fp_unary_op_unpredicated) {
SETUP();
- COMPARE_PREFIX(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h");
- COMPARE_PREFIX(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s");
- COMPARE_PREFIX(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d");
- COMPARE_PREFIX(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h");
- COMPARE_PREFIX(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s");
- COMPARE_PREFIX(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d");
+ COMPARE(frecpe(z0.VnH(), z2.VnH()), "frecpe z0.h, z2.h");
+ COMPARE(frecpe(z0.VnS(), z2.VnS()), "frecpe z0.s, z2.s");
+ COMPARE(frecpe(z0.VnD(), z2.VnD()), "frecpe z0.d, z2.d");
+ COMPARE(frsqrte(z27.VnH(), z14.VnH()), "frsqrte z27.h, z14.h");
+ COMPARE(frsqrte(z27.VnS(), z14.VnS()), "frsqrte z27.s, z14.s");
+ COMPARE(frsqrte(z27.VnD(), z14.VnD()), "frsqrte z27.d, z14.d");
CLEANUP();
}
@@ -1923,64 +2013,64 @@ TEST(sve_fp_unary_op_unpredicated) {
TEST(sve_inc_dec_by_predicate_count) {
SETUP();
- COMPARE_PREFIX(decp(x17, p0.VnB()), "decp x17, p0.b");
- COMPARE_PREFIX(decp(x17, p0.VnH()), "decp x17, p0.h");
- COMPARE_PREFIX(decp(x17, p0.VnS()), "decp x17, p0.s");
- COMPARE_PREFIX(decp(x17, p0.VnD()), "decp x17, p0.d");
- COMPARE_PREFIX(decp(z2.VnH(), p11), "decp z2.h, p11");
- COMPARE_PREFIX(decp(z2.VnS(), p11), "decp z2.s, p11");
- COMPARE_PREFIX(decp(z2.VnD(), p11), "decp z2.d, p11");
- COMPARE_PREFIX(incp(x26, p8.VnB()), "incp x26, p8.b");
- COMPARE_PREFIX(incp(x26, p8.VnH()), "incp x26, p8.h");
- COMPARE_PREFIX(incp(x26, p8.VnS()), "incp x26, p8.s");
- COMPARE_PREFIX(incp(x26, p8.VnD()), "incp x26, p8.d");
- COMPARE_PREFIX(incp(z27.VnH(), p9), "incp z27.h, p9");
- COMPARE_PREFIX(incp(z27.VnS(), p9), "incp z27.s, p9");
- COMPARE_PREFIX(incp(z27.VnD(), p9), "incp z27.d, p9");
- COMPARE_PREFIX(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12");
- COMPARE_PREFIX(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12");
- COMPARE_PREFIX(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12");
- COMPARE_PREFIX(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12");
- COMPARE_PREFIX(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b");
- COMPARE_PREFIX(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h");
- COMPARE_PREFIX(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s");
- COMPARE_PREFIX(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d");
- COMPARE_PREFIX(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1");
- COMPARE_PREFIX(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1");
- COMPARE_PREFIX(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1");
- COMPARE_PREFIX(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26");
- COMPARE_PREFIX(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26");
- COMPARE_PREFIX(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26");
- COMPARE_PREFIX(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26");
- COMPARE_PREFIX(sqincp(x5, p15.VnB()), "sqincp x5, p15.b");
- COMPARE_PREFIX(sqincp(x5, p15.VnH()), "sqincp x5, p15.h");
- COMPARE_PREFIX(sqincp(x5, p15.VnS()), "sqincp x5, p15.s");
- COMPARE_PREFIX(sqincp(x5, p15.VnD()), "sqincp x5, p15.d");
- COMPARE_PREFIX(sqincp(z14.VnH(), p4), "sqincp z14.h, p4");
- COMPARE_PREFIX(sqincp(z14.VnS(), p4), "sqincp z14.s, p4");
- COMPARE_PREFIX(sqincp(z14.VnD(), p4), "sqincp z14.d, p4");
- COMPARE_PREFIX(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b");
- COMPARE_PREFIX(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h");
- COMPARE_PREFIX(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s");
- COMPARE_PREFIX(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d");
- COMPARE_PREFIX(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b");
- COMPARE_PREFIX(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h");
- COMPARE_PREFIX(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s");
- COMPARE_PREFIX(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d");
- COMPARE_PREFIX(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9");
- COMPARE_PREFIX(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9");
- COMPARE_PREFIX(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9");
- COMPARE_PREFIX(uqincp(w18, p1.VnB()), "uqincp w18, p1.b");
- COMPARE_PREFIX(uqincp(w18, p1.VnH()), "uqincp w18, p1.h");
- COMPARE_PREFIX(uqincp(w18, p1.VnS()), "uqincp w18, p1.s");
- COMPARE_PREFIX(uqincp(w18, p1.VnD()), "uqincp w18, p1.d");
- COMPARE_PREFIX(uqincp(x17, p15.VnB()), "uqincp x17, p15.b");
- COMPARE_PREFIX(uqincp(x17, p15.VnH()), "uqincp x17, p15.h");
- COMPARE_PREFIX(uqincp(x17, p15.VnS()), "uqincp x17, p15.s");
- COMPARE_PREFIX(uqincp(x17, p15.VnD()), "uqincp x17, p15.d");
- COMPARE_PREFIX(uqincp(z4.VnH(), p3), "uqincp z4.h, p3");
- COMPARE_PREFIX(uqincp(z4.VnS(), p3), "uqincp z4.s, p3");
- COMPARE_PREFIX(uqincp(z4.VnD(), p3), "uqincp z4.d, p3");
+ COMPARE(decp(x17, p0.VnB()), "decp x17, p0.b");
+ COMPARE(decp(x17, p0.VnH()), "decp x17, p0.h");
+ COMPARE(decp(x17, p0.VnS()), "decp x17, p0.s");
+ COMPARE(decp(x17, p0.VnD()), "decp x17, p0.d");
+ COMPARE(decp(z2.VnH(), p11), "decp z2.h, p11");
+ COMPARE(decp(z2.VnS(), p11), "decp z2.s, p11");
+ COMPARE(decp(z2.VnD(), p11), "decp z2.d, p11");
+ COMPARE(incp(x26, p8.VnB()), "incp x26, p8.b");
+ COMPARE(incp(x26, p8.VnH()), "incp x26, p8.h");
+ COMPARE(incp(x26, p8.VnS()), "incp x26, p8.s");
+ COMPARE(incp(x26, p8.VnD()), "incp x26, p8.d");
+ COMPARE(incp(z27.VnH(), p9), "incp z27.h, p9");
+ COMPARE(incp(z27.VnS(), p9), "incp z27.s, p9");
+ COMPARE(incp(z27.VnD(), p9), "incp z27.d, p9");
+ COMPARE(sqdecp(x12, p7.VnB(), w12), "sqdecp x12, p7.b, w12");
+ COMPARE(sqdecp(x12, p7.VnH(), w12), "sqdecp x12, p7.h, w12");
+ COMPARE(sqdecp(x12, p7.VnS(), w12), "sqdecp x12, p7.s, w12");
+ COMPARE(sqdecp(x12, p7.VnD(), w12), "sqdecp x12, p7.d, w12");
+ COMPARE(sqdecp(x30, p5.VnB()), "sqdecp x30, p5.b");
+ COMPARE(sqdecp(x30, p5.VnH()), "sqdecp x30, p5.h");
+ COMPARE(sqdecp(x30, p5.VnS()), "sqdecp x30, p5.s");
+ COMPARE(sqdecp(x30, p5.VnD()), "sqdecp x30, p5.d");
+ COMPARE(sqdecp(z13.VnH(), p1), "sqdecp z13.h, p1");
+ COMPARE(sqdecp(z13.VnS(), p1), "sqdecp z13.s, p1");
+ COMPARE(sqdecp(z13.VnD(), p1), "sqdecp z13.d, p1");
+ COMPARE(sqincp(x26, p5.VnB(), w26), "sqincp x26, p5.b, w26");
+ COMPARE(sqincp(x26, p5.VnH(), w26), "sqincp x26, p5.h, w26");
+ COMPARE(sqincp(x26, p5.VnS(), w26), "sqincp x26, p5.s, w26");
+ COMPARE(sqincp(x26, p5.VnD(), w26), "sqincp x26, p5.d, w26");
+ COMPARE(sqincp(x5, p15.VnB()), "sqincp x5, p15.b");
+ COMPARE(sqincp(x5, p15.VnH()), "sqincp x5, p15.h");
+ COMPARE(sqincp(x5, p15.VnS()), "sqincp x5, p15.s");
+ COMPARE(sqincp(x5, p15.VnD()), "sqincp x5, p15.d");
+ COMPARE(sqincp(z14.VnH(), p4), "sqincp z14.h, p4");
+ COMPARE(sqincp(z14.VnS(), p4), "sqincp z14.s, p4");
+ COMPARE(sqincp(z14.VnD(), p4), "sqincp z14.d, p4");
+ COMPARE(uqdecp(w3, p13.VnB()), "uqdecp w3, p13.b");
+ COMPARE(uqdecp(w3, p13.VnH()), "uqdecp w3, p13.h");
+ COMPARE(uqdecp(w3, p13.VnS()), "uqdecp w3, p13.s");
+ COMPARE(uqdecp(w3, p13.VnD()), "uqdecp w3, p13.d");
+ COMPARE(uqdecp(x19, p0.VnB()), "uqdecp x19, p0.b");
+ COMPARE(uqdecp(x19, p0.VnH()), "uqdecp x19, p0.h");
+ COMPARE(uqdecp(x19, p0.VnS()), "uqdecp x19, p0.s");
+ COMPARE(uqdecp(x19, p0.VnD()), "uqdecp x19, p0.d");
+ COMPARE(uqdecp(z15.VnH(), p9), "uqdecp z15.h, p9");
+ COMPARE(uqdecp(z15.VnS(), p9), "uqdecp z15.s, p9");
+ COMPARE(uqdecp(z15.VnD(), p9), "uqdecp z15.d, p9");
+ COMPARE(uqincp(w18, p1.VnB()), "uqincp w18, p1.b");
+ COMPARE(uqincp(w18, p1.VnH()), "uqincp w18, p1.h");
+ COMPARE(uqincp(w18, p1.VnS()), "uqincp w18, p1.s");
+ COMPARE(uqincp(w18, p1.VnD()), "uqincp w18, p1.d");
+ COMPARE(uqincp(x17, p15.VnB()), "uqincp x17, p15.b");
+ COMPARE(uqincp(x17, p15.VnH()), "uqincp x17, p15.h");
+ COMPARE(uqincp(x17, p15.VnS()), "uqincp x17, p15.s");
+ COMPARE(uqincp(x17, p15.VnD()), "uqincp x17, p15.d");
+ COMPARE(uqincp(z4.VnH(), p3), "uqincp z4.h, p3");
+ COMPARE(uqincp(z4.VnS(), p3), "uqincp z4.s, p3");
+ COMPARE(uqincp(z4.VnD(), p3), "uqincp z4.d, p3");
CLEANUP();
}
@@ -2038,24 +2128,24 @@ TEST(sve_inc_dec_by_predicate_count_macro) {
TEST(sve_index_generation) {
SETUP();
- COMPARE_PREFIX(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15");
- COMPARE_PREFIX(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1");
- COMPARE_PREFIX(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0");
- COMPARE_PREFIX(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1");
- COMPARE_PREFIX(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2");
- COMPARE_PREFIX(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16");
- COMPARE_PREFIX(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8");
- COMPARE_PREFIX(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9");
- COMPARE_PREFIX(index(z25.VnS(), 0, w10), "index z25.s, #0, w10");
- COMPARE_PREFIX(index(z26.VnD(), 15, x11), "index z26.d, #15, x11");
- COMPARE_PREFIX(index(z14.VnB(), w15, 15), "index z14.b, w15, #15");
- COMPARE_PREFIX(index(z15.VnH(), x16, 1), "index z15.h, w16, #1");
- COMPARE_PREFIX(index(z16.VnS(), w17, 0), "index z16.s, w17, #0");
- COMPARE_PREFIX(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16");
- COMPARE_PREFIX(index(z20.VnB(), w23, w21), "index z20.b, w23, w21");
- COMPARE_PREFIX(index(z21.VnH(), x24, w22), "index z21.h, w24, w22");
- COMPARE_PREFIX(index(z22.VnS(), w25, x23), "index z22.s, w25, w23");
- COMPARE_PREFIX(index(z23.VnD(), x26, x24), "index z23.d, x26, x24");
+ COMPARE(index(z21.VnB(), -16, 15), "index z21.b, #-16, #15");
+ COMPARE(index(z22.VnB(), -2, 1), "index z22.b, #-2, #1");
+ COMPARE(index(z23.VnH(), -1, 0), "index z23.h, #-1, #0");
+ COMPARE(index(z24.VnS(), 0, -1), "index z24.s, #0, #-1");
+ COMPARE(index(z25.VnD(), 1, -2), "index z25.d, #1, #-2");
+ COMPARE(index(z26.VnB(), 15, -16), "index z26.b, #15, #-16");
+ COMPARE(index(z23.VnB(), -16, w8), "index z23.b, #-16, w8");
+ COMPARE(index(z24.VnH(), -1, x9), "index z24.h, #-1, w9");
+ COMPARE(index(z25.VnS(), 0, w10), "index z25.s, #0, w10");
+ COMPARE(index(z26.VnD(), 15, x11), "index z26.d, #15, x11");
+ COMPARE(index(z14.VnB(), w15, 15), "index z14.b, w15, #15");
+ COMPARE(index(z15.VnH(), x16, 1), "index z15.h, w16, #1");
+ COMPARE(index(z16.VnS(), w17, 0), "index z16.s, w17, #0");
+ COMPARE(index(z17.VnD(), x18, -16), "index z17.d, x18, #-16");
+ COMPARE(index(z20.VnB(), w23, w21), "index z20.b, w23, w21");
+ COMPARE(index(z21.VnH(), x24, w22), "index z21.h, w24, w22");
+ COMPARE(index(z22.VnS(), w25, x23), "index z22.s, w25, w23");
+ COMPARE(index(z23.VnD(), x26, x24), "index z23.d, x26, x24");
// Simple pass-through macros.
COMPARE_MACRO(Index(z21.VnB(), -16, 15), "index z21.b, #-16, #15");
@@ -2099,50 +2189,30 @@ TEST(sve_index_generation) {
TEST(sve_int_arithmetic_unpredicated) {
SETUP();
- COMPARE_PREFIX(add(z23.VnB(), z30.VnB(), z31.VnB()),
- "add z23.b, z30.b, z31.b");
- COMPARE_PREFIX(add(z24.VnH(), z29.VnH(), z30.VnH()),
- "add z24.h, z29.h, z30.h");
- COMPARE_PREFIX(add(z25.VnS(), z28.VnS(), z29.VnS()),
- "add z25.s, z28.s, z29.s");
- COMPARE_PREFIX(add(z26.VnD(), z27.VnD(), z28.VnD()),
- "add z26.d, z27.d, z28.d");
- COMPARE_PREFIX(sqadd(z26.VnB(), z21.VnB(), z1.VnB()),
- "sqadd z26.b, z21.b, z1.b");
- COMPARE_PREFIX(sqadd(z25.VnH(), z20.VnH(), z2.VnH()),
- "sqadd z25.h, z20.h, z2.h");
- COMPARE_PREFIX(sqadd(z24.VnS(), z19.VnS(), z3.VnS()),
- "sqadd z24.s, z19.s, z3.s");
- COMPARE_PREFIX(sqadd(z23.VnD(), z18.VnD(), z4.VnD()),
- "sqadd z23.d, z18.d, z4.d");
- COMPARE_PREFIX(sqsub(z1.VnB(), z10.VnB(), z0.VnB()),
- "sqsub z1.b, z10.b, z0.b");
- COMPARE_PREFIX(sqsub(z2.VnH(), z11.VnH(), z1.VnH()),
- "sqsub z2.h, z11.h, z1.h");
- COMPARE_PREFIX(sqsub(z3.VnS(), z12.VnS(), z2.VnS()),
- "sqsub z3.s, z12.s, z2.s");
- COMPARE_PREFIX(sqsub(z4.VnD(), z13.VnD(), z3.VnD()),
- "sqsub z4.d, z13.d, z3.d");
- COMPARE_PREFIX(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b");
- COMPARE_PREFIX(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h");
- COMPARE_PREFIX(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s");
- COMPARE_PREFIX(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d");
- COMPARE_PREFIX(uqadd(z13.VnB(), z15.VnB(), z3.VnB()),
- "uqadd z13.b, z15.b, z3.b");
- COMPARE_PREFIX(uqadd(z12.VnH(), z16.VnH(), z2.VnH()),
- "uqadd z12.h, z16.h, z2.h");
- COMPARE_PREFIX(uqadd(z11.VnS(), z17.VnS(), z1.VnS()),
- "uqadd z11.s, z17.s, z1.s");
- COMPARE_PREFIX(uqadd(z10.VnD(), z18.VnD(), z0.VnD()),
- "uqadd z10.d, z18.d, z0.d");
- COMPARE_PREFIX(uqsub(z9.VnB(), z13.VnB(), z13.VnB()),
- "uqsub z9.b, z13.b, z13.b");
- COMPARE_PREFIX(uqsub(z11.VnH(), z15.VnH(), z11.VnH()),
- "uqsub z11.h, z15.h, z11.h");
- COMPARE_PREFIX(uqsub(z13.VnS(), z17.VnS(), z13.VnS()),
- "uqsub z13.s, z17.s, z13.s");
- COMPARE_PREFIX(uqsub(z15.VnD(), z19.VnD(), z15.VnD()),
- "uqsub z15.d, z19.d, z15.d");
+ COMPARE(add(z23.VnB(), z30.VnB(), z31.VnB()), "add z23.b, z30.b, z31.b");
+ COMPARE(add(z24.VnH(), z29.VnH(), z30.VnH()), "add z24.h, z29.h, z30.h");
+ COMPARE(add(z25.VnS(), z28.VnS(), z29.VnS()), "add z25.s, z28.s, z29.s");
+ COMPARE(add(z26.VnD(), z27.VnD(), z28.VnD()), "add z26.d, z27.d, z28.d");
+ COMPARE(sqadd(z26.VnB(), z21.VnB(), z1.VnB()), "sqadd z26.b, z21.b, z1.b");
+ COMPARE(sqadd(z25.VnH(), z20.VnH(), z2.VnH()), "sqadd z25.h, z20.h, z2.h");
+ COMPARE(sqadd(z24.VnS(), z19.VnS(), z3.VnS()), "sqadd z24.s, z19.s, z3.s");
+ COMPARE(sqadd(z23.VnD(), z18.VnD(), z4.VnD()), "sqadd z23.d, z18.d, z4.d");
+ COMPARE(sqsub(z1.VnB(), z10.VnB(), z0.VnB()), "sqsub z1.b, z10.b, z0.b");
+ COMPARE(sqsub(z2.VnH(), z11.VnH(), z1.VnH()), "sqsub z2.h, z11.h, z1.h");
+ COMPARE(sqsub(z3.VnS(), z12.VnS(), z2.VnS()), "sqsub z3.s, z12.s, z2.s");
+ COMPARE(sqsub(z4.VnD(), z13.VnD(), z3.VnD()), "sqsub z4.d, z13.d, z3.d");
+ COMPARE(sub(z9.VnB(), z7.VnB(), z25.VnB()), "sub z9.b, z7.b, z25.b");
+ COMPARE(sub(z8.VnH(), z8.VnH(), z26.VnH()), "sub z8.h, z8.h, z26.h");
+ COMPARE(sub(z7.VnS(), z9.VnS(), z27.VnS()), "sub z7.s, z9.s, z27.s");
+ COMPARE(sub(z6.VnD(), z10.VnD(), z28.VnD()), "sub z6.d, z10.d, z28.d");
+ COMPARE(uqadd(z13.VnB(), z15.VnB(), z3.VnB()), "uqadd z13.b, z15.b, z3.b");
+ COMPARE(uqadd(z12.VnH(), z16.VnH(), z2.VnH()), "uqadd z12.h, z16.h, z2.h");
+ COMPARE(uqadd(z11.VnS(), z17.VnS(), z1.VnS()), "uqadd z11.s, z17.s, z1.s");
+ COMPARE(uqadd(z10.VnD(), z18.VnD(), z0.VnD()), "uqadd z10.d, z18.d, z0.d");
+ COMPARE(uqsub(z9.VnB(), z13.VnB(), z13.VnB()), "uqsub z9.b, z13.b, z13.b");
+ COMPARE(uqsub(z11.VnH(), z15.VnH(), z11.VnH()), "uqsub z11.h, z15.h, z11.h");
+ COMPARE(uqsub(z13.VnS(), z17.VnS(), z13.VnS()), "uqsub z13.s, z17.s, z13.s");
+ COMPARE(uqsub(z15.VnD(), z19.VnD(), z15.VnD()), "uqsub z15.d, z19.d, z15.d");
CLEANUP();
}
@@ -2150,142 +2220,142 @@ TEST(sve_int_arithmetic_unpredicated) {
TEST(sve_int_binary_arithmetic_predicated) {
SETUP();
- COMPARE_PREFIX(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()),
- "add z22.b, p4/m, z22.b, z20.b");
- COMPARE_PREFIX(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()),
- "add z22.h, p4/m, z22.h, z20.h");
- COMPARE_PREFIX(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()),
- "add z22.s, p4/m, z22.s, z20.s");
- COMPARE_PREFIX(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()),
- "add z22.d, p4/m, z22.d, z20.d");
- COMPARE_PREFIX(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()),
- "and z22.b, p3/m, z22.b, z3.b");
- COMPARE_PREFIX(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()),
- "and z22.h, p3/m, z22.h, z3.h");
- COMPARE_PREFIX(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()),
- "and z22.s, p3/m, z22.s, z3.s");
- COMPARE_PREFIX(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()),
- "and z22.d, p3/m, z22.d, z3.d");
- COMPARE_PREFIX(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()),
- "bic z17.b, p7/m, z17.b, z10.b");
- COMPARE_PREFIX(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()),
- "bic z17.h, p7/m, z17.h, z10.h");
- COMPARE_PREFIX(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()),
- "bic z17.s, p7/m, z17.s, z10.s");
- COMPARE_PREFIX(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()),
- "bic z17.d, p7/m, z17.d, z10.d");
- COMPARE_PREFIX(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()),
- "eor z23.b, p4/m, z23.b, z15.b");
- COMPARE_PREFIX(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()),
- "eor z23.h, p4/m, z23.h, z15.h");
- COMPARE_PREFIX(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()),
- "eor z23.s, p4/m, z23.s, z15.s");
- COMPARE_PREFIX(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()),
- "eor z23.d, p4/m, z23.d, z15.d");
- COMPARE_PREFIX(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()),
- "mul z15.b, p5/m, z15.b, z15.b");
- COMPARE_PREFIX(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()),
- "mul z15.h, p5/m, z15.h, z15.h");
- COMPARE_PREFIX(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()),
- "mul z15.s, p5/m, z15.s, z15.s");
- COMPARE_PREFIX(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()),
- "mul z15.d, p5/m, z15.d, z15.d");
- COMPARE_PREFIX(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()),
- "orr z9.b, p1/m, z9.b, z28.b");
- COMPARE_PREFIX(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()),
- "orr z9.h, p1/m, z9.h, z28.h");
- COMPARE_PREFIX(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()),
- "orr z9.s, p1/m, z9.s, z28.s");
- COMPARE_PREFIX(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()),
- "orr z9.d, p1/m, z9.d, z28.d");
- COMPARE_PREFIX(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()),
- "sabd z11.b, p6/m, z11.b, z31.b");
- COMPARE_PREFIX(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()),
- "sabd z11.h, p6/m, z11.h, z31.h");
- COMPARE_PREFIX(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()),
- "sabd z11.s, p6/m, z11.s, z31.s");
- COMPARE_PREFIX(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()),
- "sabd z11.d, p6/m, z11.d, z31.d");
- COMPARE_PREFIX(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()),
- "sdivr z20.s, p5/m, z20.s, z23.s");
- COMPARE_PREFIX(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()),
- "sdiv z15.d, p6/m, z15.d, z8.d");
- COMPARE_PREFIX(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()),
- "smax z30.b, p4/m, z30.b, z30.b");
- COMPARE_PREFIX(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()),
- "smax z30.h, p4/m, z30.h, z30.h");
- COMPARE_PREFIX(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()),
- "smax z30.s, p4/m, z30.s, z30.s");
- COMPARE_PREFIX(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()),
- "smax z30.d, p4/m, z30.d, z30.d");
- COMPARE_PREFIX(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()),
- "smin z20.b, p7/m, z20.b, z19.b");
- COMPARE_PREFIX(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()),
- "smin z20.h, p7/m, z20.h, z19.h");
- COMPARE_PREFIX(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()),
- "smin z20.s, p7/m, z20.s, z19.s");
- COMPARE_PREFIX(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()),
- "smin z20.d, p7/m, z20.d, z19.d");
- COMPARE_PREFIX(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()),
- "smulh z23.b, p0/m, z23.b, z3.b");
- COMPARE_PREFIX(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()),
- "smulh z23.h, p0/m, z23.h, z3.h");
- COMPARE_PREFIX(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()),
- "smulh z23.s, p0/m, z23.s, z3.s");
- COMPARE_PREFIX(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()),
- "smulh z23.d, p0/m, z23.d, z3.d");
- COMPARE_PREFIX(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()),
- "subr z1.b, p6/m, z1.b, z1.b");
- COMPARE_PREFIX(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()),
- "subr z1.h, p6/m, z1.h, z1.h");
- COMPARE_PREFIX(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()),
- "subr z1.s, p6/m, z1.s, z1.s");
- COMPARE_PREFIX(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()),
- "subr z1.d, p6/m, z1.d, z1.d");
- COMPARE_PREFIX(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()),
- "sub z28.b, p2/m, z28.b, z0.b");
- COMPARE_PREFIX(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()),
- "sub z28.h, p2/m, z28.h, z0.h");
- COMPARE_PREFIX(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()),
- "sub z28.s, p2/m, z28.s, z0.s");
- COMPARE_PREFIX(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()),
- "sub z28.d, p2/m, z28.d, z0.d");
- COMPARE_PREFIX(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()),
- "uabd z14.b, p6/m, z14.b, z22.b");
- COMPARE_PREFIX(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()),
- "uabd z14.h, p6/m, z14.h, z22.h");
- COMPARE_PREFIX(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()),
- "uabd z14.s, p6/m, z14.s, z22.s");
- COMPARE_PREFIX(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()),
- "uabd z14.d, p6/m, z14.d, z22.d");
- COMPARE_PREFIX(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()),
- "udivr z27.s, p5/m, z27.s, z31.s");
- COMPARE_PREFIX(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()),
- "udiv z13.d, p4/m, z13.d, z11.d");
- COMPARE_PREFIX(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()),
- "umax z0.b, p5/m, z0.b, z14.b");
- COMPARE_PREFIX(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()),
- "umax z0.h, p5/m, z0.h, z14.h");
- COMPARE_PREFIX(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()),
- "umax z0.s, p5/m, z0.s, z14.s");
- COMPARE_PREFIX(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()),
- "umax z0.d, p5/m, z0.d, z14.d");
- COMPARE_PREFIX(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()),
- "umin z26.b, p5/m, z26.b, z12.b");
- COMPARE_PREFIX(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()),
- "umin z26.h, p5/m, z26.h, z12.h");
- COMPARE_PREFIX(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()),
- "umin z26.s, p5/m, z26.s, z12.s");
- COMPARE_PREFIX(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()),
- "umin z26.d, p5/m, z26.d, z12.d");
- COMPARE_PREFIX(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()),
- "umulh z12.b, p2/m, z12.b, z17.b");
- COMPARE_PREFIX(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()),
- "umulh z12.h, p2/m, z12.h, z17.h");
- COMPARE_PREFIX(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()),
- "umulh z12.s, p2/m, z12.s, z17.s");
- COMPARE_PREFIX(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()),
- "umulh z12.d, p2/m, z12.d, z17.d");
+ COMPARE(add(z22.VnB(), p4.Merging(), z22.VnB(), z20.VnB()),
+ "add z22.b, p4/m, z22.b, z20.b");
+ COMPARE(add(z22.VnH(), p4.Merging(), z22.VnH(), z20.VnH()),
+ "add z22.h, p4/m, z22.h, z20.h");
+ COMPARE(add(z22.VnS(), p4.Merging(), z22.VnS(), z20.VnS()),
+ "add z22.s, p4/m, z22.s, z20.s");
+ COMPARE(add(z22.VnD(), p4.Merging(), z22.VnD(), z20.VnD()),
+ "add z22.d, p4/m, z22.d, z20.d");
+ COMPARE(and_(z22.VnB(), p3.Merging(), z22.VnB(), z3.VnB()),
+ "and z22.b, p3/m, z22.b, z3.b");
+ COMPARE(and_(z22.VnH(), p3.Merging(), z22.VnH(), z3.VnH()),
+ "and z22.h, p3/m, z22.h, z3.h");
+ COMPARE(and_(z22.VnS(), p3.Merging(), z22.VnS(), z3.VnS()),
+ "and z22.s, p3/m, z22.s, z3.s");
+ COMPARE(and_(z22.VnD(), p3.Merging(), z22.VnD(), z3.VnD()),
+ "and z22.d, p3/m, z22.d, z3.d");
+ COMPARE(bic(z17.VnB(), p7.Merging(), z17.VnB(), z10.VnB()),
+ "bic z17.b, p7/m, z17.b, z10.b");
+ COMPARE(bic(z17.VnH(), p7.Merging(), z17.VnH(), z10.VnH()),
+ "bic z17.h, p7/m, z17.h, z10.h");
+ COMPARE(bic(z17.VnS(), p7.Merging(), z17.VnS(), z10.VnS()),
+ "bic z17.s, p7/m, z17.s, z10.s");
+ COMPARE(bic(z17.VnD(), p7.Merging(), z17.VnD(), z10.VnD()),
+ "bic z17.d, p7/m, z17.d, z10.d");
+ COMPARE(eor(z23.VnB(), p4.Merging(), z23.VnB(), z15.VnB()),
+ "eor z23.b, p4/m, z23.b, z15.b");
+ COMPARE(eor(z23.VnH(), p4.Merging(), z23.VnH(), z15.VnH()),
+ "eor z23.h, p4/m, z23.h, z15.h");
+ COMPARE(eor(z23.VnS(), p4.Merging(), z23.VnS(), z15.VnS()),
+ "eor z23.s, p4/m, z23.s, z15.s");
+ COMPARE(eor(z23.VnD(), p4.Merging(), z23.VnD(), z15.VnD()),
+ "eor z23.d, p4/m, z23.d, z15.d");
+ COMPARE(mul(z15.VnB(), p5.Merging(), z15.VnB(), z15.VnB()),
+ "mul z15.b, p5/m, z15.b, z15.b");
+ COMPARE(mul(z15.VnH(), p5.Merging(), z15.VnH(), z15.VnH()),
+ "mul z15.h, p5/m, z15.h, z15.h");
+ COMPARE(mul(z15.VnS(), p5.Merging(), z15.VnS(), z15.VnS()),
+ "mul z15.s, p5/m, z15.s, z15.s");
+ COMPARE(mul(z15.VnD(), p5.Merging(), z15.VnD(), z15.VnD()),
+ "mul z15.d, p5/m, z15.d, z15.d");
+ COMPARE(orr(z9.VnB(), p1.Merging(), z9.VnB(), z28.VnB()),
+ "orr z9.b, p1/m, z9.b, z28.b");
+ COMPARE(orr(z9.VnH(), p1.Merging(), z9.VnH(), z28.VnH()),
+ "orr z9.h, p1/m, z9.h, z28.h");
+ COMPARE(orr(z9.VnS(), p1.Merging(), z9.VnS(), z28.VnS()),
+ "orr z9.s, p1/m, z9.s, z28.s");
+ COMPARE(orr(z9.VnD(), p1.Merging(), z9.VnD(), z28.VnD()),
+ "orr z9.d, p1/m, z9.d, z28.d");
+ COMPARE(sabd(z11.VnB(), p6.Merging(), z11.VnB(), z31.VnB()),
+ "sabd z11.b, p6/m, z11.b, z31.b");
+ COMPARE(sabd(z11.VnH(), p6.Merging(), z11.VnH(), z31.VnH()),
+ "sabd z11.h, p6/m, z11.h, z31.h");
+ COMPARE(sabd(z11.VnS(), p6.Merging(), z11.VnS(), z31.VnS()),
+ "sabd z11.s, p6/m, z11.s, z31.s");
+ COMPARE(sabd(z11.VnD(), p6.Merging(), z11.VnD(), z31.VnD()),
+ "sabd z11.d, p6/m, z11.d, z31.d");
+ COMPARE(sdivr(z20.VnS(), p5.Merging(), z20.VnS(), z23.VnS()),
+ "sdivr z20.s, p5/m, z20.s, z23.s");
+ COMPARE(sdiv(z15.VnD(), p6.Merging(), z15.VnD(), z8.VnD()),
+ "sdiv z15.d, p6/m, z15.d, z8.d");
+ COMPARE(smax(z30.VnB(), p4.Merging(), z30.VnB(), z30.VnB()),
+ "smax z30.b, p4/m, z30.b, z30.b");
+ COMPARE(smax(z30.VnH(), p4.Merging(), z30.VnH(), z30.VnH()),
+ "smax z30.h, p4/m, z30.h, z30.h");
+ COMPARE(smax(z30.VnS(), p4.Merging(), z30.VnS(), z30.VnS()),
+ "smax z30.s, p4/m, z30.s, z30.s");
+ COMPARE(smax(z30.VnD(), p4.Merging(), z30.VnD(), z30.VnD()),
+ "smax z30.d, p4/m, z30.d, z30.d");
+ COMPARE(smin(z20.VnB(), p7.Merging(), z20.VnB(), z19.VnB()),
+ "smin z20.b, p7/m, z20.b, z19.b");
+ COMPARE(smin(z20.VnH(), p7.Merging(), z20.VnH(), z19.VnH()),
+ "smin z20.h, p7/m, z20.h, z19.h");
+ COMPARE(smin(z20.VnS(), p7.Merging(), z20.VnS(), z19.VnS()),
+ "smin z20.s, p7/m, z20.s, z19.s");
+ COMPARE(smin(z20.VnD(), p7.Merging(), z20.VnD(), z19.VnD()),
+ "smin z20.d, p7/m, z20.d, z19.d");
+ COMPARE(smulh(z23.VnB(), p0.Merging(), z23.VnB(), z3.VnB()),
+ "smulh z23.b, p0/m, z23.b, z3.b");
+ COMPARE(smulh(z23.VnH(), p0.Merging(), z23.VnH(), z3.VnH()),
+ "smulh z23.h, p0/m, z23.h, z3.h");
+ COMPARE(smulh(z23.VnS(), p0.Merging(), z23.VnS(), z3.VnS()),
+ "smulh z23.s, p0/m, z23.s, z3.s");
+ COMPARE(smulh(z23.VnD(), p0.Merging(), z23.VnD(), z3.VnD()),
+ "smulh z23.d, p0/m, z23.d, z3.d");
+ COMPARE(subr(z1.VnB(), p6.Merging(), z1.VnB(), z1.VnB()),
+ "subr z1.b, p6/m, z1.b, z1.b");
+ COMPARE(subr(z1.VnH(), p6.Merging(), z1.VnH(), z1.VnH()),
+ "subr z1.h, p6/m, z1.h, z1.h");
+ COMPARE(subr(z1.VnS(), p6.Merging(), z1.VnS(), z1.VnS()),
+ "subr z1.s, p6/m, z1.s, z1.s");
+ COMPARE(subr(z1.VnD(), p6.Merging(), z1.VnD(), z1.VnD()),
+ "subr z1.d, p6/m, z1.d, z1.d");
+ COMPARE(sub(z28.VnB(), p2.Merging(), z28.VnB(), z0.VnB()),
+ "sub z28.b, p2/m, z28.b, z0.b");
+ COMPARE(sub(z28.VnH(), p2.Merging(), z28.VnH(), z0.VnH()),
+ "sub z28.h, p2/m, z28.h, z0.h");
+ COMPARE(sub(z28.VnS(), p2.Merging(), z28.VnS(), z0.VnS()),
+ "sub z28.s, p2/m, z28.s, z0.s");
+ COMPARE(sub(z28.VnD(), p2.Merging(), z28.VnD(), z0.VnD()),
+ "sub z28.d, p2/m, z28.d, z0.d");
+ COMPARE(uabd(z14.VnB(), p6.Merging(), z14.VnB(), z22.VnB()),
+ "uabd z14.b, p6/m, z14.b, z22.b");
+ COMPARE(uabd(z14.VnH(), p6.Merging(), z14.VnH(), z22.VnH()),
+ "uabd z14.h, p6/m, z14.h, z22.h");
+ COMPARE(uabd(z14.VnS(), p6.Merging(), z14.VnS(), z22.VnS()),
+ "uabd z14.s, p6/m, z14.s, z22.s");
+ COMPARE(uabd(z14.VnD(), p6.Merging(), z14.VnD(), z22.VnD()),
+ "uabd z14.d, p6/m, z14.d, z22.d");
+ COMPARE(udivr(z27.VnS(), p5.Merging(), z27.VnS(), z31.VnS()),
+ "udivr z27.s, p5/m, z27.s, z31.s");
+ COMPARE(udiv(z13.VnD(), p4.Merging(), z13.VnD(), z11.VnD()),
+ "udiv z13.d, p4/m, z13.d, z11.d");
+ COMPARE(umax(z0.VnB(), p5.Merging(), z0.VnB(), z14.VnB()),
+ "umax z0.b, p5/m, z0.b, z14.b");
+ COMPARE(umax(z0.VnH(), p5.Merging(), z0.VnH(), z14.VnH()),
+ "umax z0.h, p5/m, z0.h, z14.h");
+ COMPARE(umax(z0.VnS(), p5.Merging(), z0.VnS(), z14.VnS()),
+ "umax z0.s, p5/m, z0.s, z14.s");
+ COMPARE(umax(z0.VnD(), p5.Merging(), z0.VnD(), z14.VnD()),
+ "umax z0.d, p5/m, z0.d, z14.d");
+ COMPARE(umin(z26.VnB(), p5.Merging(), z26.VnB(), z12.VnB()),
+ "umin z26.b, p5/m, z26.b, z12.b");
+ COMPARE(umin(z26.VnH(), p5.Merging(), z26.VnH(), z12.VnH()),
+ "umin z26.h, p5/m, z26.h, z12.h");
+ COMPARE(umin(z26.VnS(), p5.Merging(), z26.VnS(), z12.VnS()),
+ "umin z26.s, p5/m, z26.s, z12.s");
+ COMPARE(umin(z26.VnD(), p5.Merging(), z26.VnD(), z12.VnD()),
+ "umin z26.d, p5/m, z26.d, z12.d");
+ COMPARE(umulh(z12.VnB(), p2.Merging(), z12.VnB(), z17.VnB()),
+ "umulh z12.b, p2/m, z12.b, z17.b");
+ COMPARE(umulh(z12.VnH(), p2.Merging(), z12.VnH(), z17.VnH()),
+ "umulh z12.h, p2/m, z12.h, z17.h");
+ COMPARE(umulh(z12.VnS(), p2.Merging(), z12.VnS(), z17.VnS()),
+ "umulh z12.s, p2/m, z12.s, z17.s");
+ COMPARE(umulh(z12.VnD(), p2.Merging(), z12.VnD(), z17.VnD()),
+ "umulh z12.d, p2/m, z12.d, z17.d");
CLEANUP();
}
@@ -2430,25 +2500,25 @@ TEST(sve_int_binary_arithmetic_predicated_macro) {
TEST(sve_int_compare_scalars) {
SETUP();
- COMPARE_PREFIX(ctermeq(w30, w26), "ctermeq w30, w26");
- COMPARE_PREFIX(ctermne(x21, x18), "ctermne x21, x18");
- COMPARE_PREFIX(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6");
- COMPARE_PREFIX(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6");
- COMPARE_PREFIX(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6");
- COMPARE_PREFIX(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6");
- COMPARE_PREFIX(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6");
- COMPARE_PREFIX(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25");
- COMPARE_PREFIX(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25");
- COMPARE_PREFIX(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25");
- COMPARE_PREFIX(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25");
- COMPARE_PREFIX(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15");
- COMPARE_PREFIX(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15");
- COMPARE_PREFIX(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15");
- COMPARE_PREFIX(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15");
- COMPARE_PREFIX(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14");
- COMPARE_PREFIX(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14");
- COMPARE_PREFIX(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14");
- COMPARE_PREFIX(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14");
+ COMPARE(ctermeq(w30, w26), "ctermeq w30, w26");
+ COMPARE(ctermne(x21, x18), "ctermne x21, x18");
+ COMPARE(whilele(p10.VnB(), x11, x6), "whilele p10.b, x11, x6");
+ COMPARE(whilele(p10.VnH(), w11, w6), "whilele p10.h, w11, w6");
+ COMPARE(whilele(p10.VnH(), x11, x6), "whilele p10.h, x11, x6");
+ COMPARE(whilele(p10.VnS(), w11, w6), "whilele p10.s, w11, w6");
+ COMPARE(whilele(p10.VnD(), x11, x6), "whilele p10.d, x11, x6");
+ COMPARE(whilelo(p4.VnB(), w3, w25), "whilelo p4.b, w3, w25");
+ COMPARE(whilelo(p4.VnH(), x3, x25), "whilelo p4.h, x3, x25");
+ COMPARE(whilelo(p4.VnS(), w3, w25), "whilelo p4.s, w3, w25");
+ COMPARE(whilelo(p4.VnD(), x3, x25), "whilelo p4.d, x3, x25");
+ COMPARE(whilels(p7.VnB(), w15, w15), "whilels p7.b, w15, w15");
+ COMPARE(whilels(p7.VnH(), x15, x15), "whilels p7.h, x15, x15");
+ COMPARE(whilels(p7.VnS(), w15, w15), "whilels p7.s, w15, w15");
+ COMPARE(whilels(p7.VnD(), x15, x15), "whilels p7.d, x15, x15");
+ COMPARE(whilelt(p14.VnB(), w11, w14), "whilelt p14.b, w11, w14");
+ COMPARE(whilelt(p14.VnH(), x11, x14), "whilelt p14.h, x11, x14");
+ COMPARE(whilelt(p14.VnS(), w11, w14), "whilelt p14.s, w11, w14");
+ COMPARE(whilelt(p14.VnD(), x11, x14), "whilelt p14.d, x11, x14");
CLEANUP();
}
@@ -2456,54 +2526,54 @@ TEST(sve_int_compare_scalars) {
TEST(sve_int_compare_signed_imm) {
SETUP();
- COMPARE_PREFIX(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15),
- "cmpeq p0.b, p3/z, z1.b, #15");
- COMPARE_PREFIX(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7),
- "cmpeq p0.h, p3/z, z1.h, #7");
- COMPARE_PREFIX(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3),
- "cmpeq p0.s, p3/z, z1.s, #-3");
- COMPARE_PREFIX(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14),
- "cmpeq p0.d, p3/z, z1.d, #-14");
- COMPARE_PREFIX(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14),
- "cmpge p9.b, p6/z, z12.b, #14");
- COMPARE_PREFIX(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6),
- "cmpge p9.h, p6/z, z12.h, #6");
- COMPARE_PREFIX(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4),
- "cmpge p9.s, p6/z, z12.s, #-4");
- COMPARE_PREFIX(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13),
- "cmpge p9.d, p6/z, z12.d, #-13");
- COMPARE_PREFIX(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13),
- "cmpgt p15.b, p4/z, z23.b, #13");
- COMPARE_PREFIX(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5),
- "cmpgt p15.h, p4/z, z23.h, #5");
- COMPARE_PREFIX(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12),
- "cmpgt p15.s, p4/z, z23.s, #-12");
- COMPARE_PREFIX(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5),
- "cmpgt p15.d, p4/z, z23.d, #-5");
- COMPARE_PREFIX(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12),
- "cmple p4.b, p3/z, z5.b, #12");
- COMPARE_PREFIX(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4),
- "cmple p4.h, p3/z, z5.h, #4");
- COMPARE_PREFIX(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11),
- "cmple p4.s, p3/z, z5.s, #-11");
- COMPARE_PREFIX(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6),
- "cmple p4.d, p3/z, z5.d, #-6");
- COMPARE_PREFIX(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11),
- "cmplt p3.b, p7/z, z15.b, #11");
- COMPARE_PREFIX(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3),
- "cmplt p3.h, p7/z, z15.h, #3");
- COMPARE_PREFIX(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10),
- "cmplt p3.s, p7/z, z15.s, #-10");
- COMPARE_PREFIX(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7),
- "cmplt p3.d, p7/z, z15.d, #-7");
- COMPARE_PREFIX(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10),
- "cmpne p13.b, p5/z, z20.b, #10");
- COMPARE_PREFIX(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2),
- "cmpne p13.h, p5/z, z20.h, #2");
- COMPARE_PREFIX(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9),
- "cmpne p13.s, p5/z, z20.s, #-9");
- COMPARE_PREFIX(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8),
- "cmpne p13.d, p5/z, z20.d, #-8");
+ COMPARE(cmpeq(p0.VnB(), p3.Zeroing(), z1.VnB(), 15),
+ "cmpeq p0.b, p3/z, z1.b, #15");
+ COMPARE(cmpeq(p0.VnH(), p3.Zeroing(), z1.VnH(), 7),
+ "cmpeq p0.h, p3/z, z1.h, #7");
+ COMPARE(cmpeq(p0.VnS(), p3.Zeroing(), z1.VnS(), -3),
+ "cmpeq p0.s, p3/z, z1.s, #-3");
+ COMPARE(cmpeq(p0.VnD(), p3.Zeroing(), z1.VnD(), -14),
+ "cmpeq p0.d, p3/z, z1.d, #-14");
+ COMPARE(cmpge(p9.VnB(), p6.Zeroing(), z12.VnB(), 14),
+ "cmpge p9.b, p6/z, z12.b, #14");
+ COMPARE(cmpge(p9.VnH(), p6.Zeroing(), z12.VnH(), 6),
+ "cmpge p9.h, p6/z, z12.h, #6");
+ COMPARE(cmpge(p9.VnS(), p6.Zeroing(), z12.VnS(), -4),
+ "cmpge p9.s, p6/z, z12.s, #-4");
+ COMPARE(cmpge(p9.VnD(), p6.Zeroing(), z12.VnD(), -13),
+ "cmpge p9.d, p6/z, z12.d, #-13");
+ COMPARE(cmpgt(p15.VnB(), p4.Zeroing(), z23.VnB(), 13),
+ "cmpgt p15.b, p4/z, z23.b, #13");
+ COMPARE(cmpgt(p15.VnH(), p4.Zeroing(), z23.VnH(), 5),
+ "cmpgt p15.h, p4/z, z23.h, #5");
+ COMPARE(cmpgt(p15.VnS(), p4.Zeroing(), z23.VnS(), -12),
+ "cmpgt p15.s, p4/z, z23.s, #-12");
+ COMPARE(cmpgt(p15.VnD(), p4.Zeroing(), z23.VnD(), -5),
+ "cmpgt p15.d, p4/z, z23.d, #-5");
+ COMPARE(cmple(p4.VnB(), p3.Zeroing(), z5.VnB(), 12),
+ "cmple p4.b, p3/z, z5.b, #12");
+ COMPARE(cmple(p4.VnH(), p3.Zeroing(), z5.VnH(), 4),
+ "cmple p4.h, p3/z, z5.h, #4");
+ COMPARE(cmple(p4.VnS(), p3.Zeroing(), z5.VnS(), -11),
+ "cmple p4.s, p3/z, z5.s, #-11");
+ COMPARE(cmple(p4.VnD(), p3.Zeroing(), z5.VnD(), -6),
+ "cmple p4.d, p3/z, z5.d, #-6");
+ COMPARE(cmplt(p3.VnB(), p7.Zeroing(), z15.VnB(), 11),
+ "cmplt p3.b, p7/z, z15.b, #11");
+ COMPARE(cmplt(p3.VnH(), p7.Zeroing(), z15.VnH(), 3),
+ "cmplt p3.h, p7/z, z15.h, #3");
+ COMPARE(cmplt(p3.VnS(), p7.Zeroing(), z15.VnS(), -10),
+ "cmplt p3.s, p7/z, z15.s, #-10");
+ COMPARE(cmplt(p3.VnD(), p7.Zeroing(), z15.VnD(), -7),
+ "cmplt p3.d, p7/z, z15.d, #-7");
+ COMPARE(cmpne(p13.VnB(), p5.Zeroing(), z20.VnB(), 10),
+ "cmpne p13.b, p5/z, z20.b, #10");
+ COMPARE(cmpne(p13.VnH(), p5.Zeroing(), z20.VnH(), 2),
+ "cmpne p13.h, p5/z, z20.h, #2");
+ COMPARE(cmpne(p13.VnS(), p5.Zeroing(), z20.VnS(), -9),
+ "cmpne p13.s, p5/z, z20.s, #-9");
+ COMPARE(cmpne(p13.VnD(), p5.Zeroing(), z20.VnD(), -8),
+ "cmpne p13.d, p5/z, z20.d, #-8");
CLEANUP();
}
@@ -2511,38 +2581,38 @@ TEST(sve_int_compare_signed_imm) {
TEST(sve_int_compare_unsigned_imm) {
SETUP();
- COMPARE_PREFIX(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127),
- "cmphi p8.b, p6/z, z1.b, #127");
- COMPARE_PREFIX(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126),
- "cmphi p8.h, p6/z, z1.h, #126");
- COMPARE_PREFIX(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99),
- "cmphi p8.s, p6/z, z1.s, #99");
- COMPARE_PREFIX(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78),
- "cmphi p8.d, p6/z, z1.d, #78");
- COMPARE_PREFIX(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67),
- "cmphs p11.b, p2/z, z8.b, #67");
- COMPARE_PREFIX(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63),
- "cmphs p11.h, p2/z, z8.h, #63");
- COMPARE_PREFIX(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51),
- "cmphs p11.s, p2/z, z8.s, #51");
- COMPARE_PREFIX(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40),
- "cmphs p11.d, p2/z, z8.d, #40");
- COMPARE_PREFIX(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32),
- "cmplo p9.b, p4/z, z4.b, #32");
- COMPARE_PREFIX(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22),
- "cmplo p9.h, p4/z, z4.h, #22");
- COMPARE_PREFIX(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15),
- "cmplo p9.s, p4/z, z4.s, #15");
- COMPARE_PREFIX(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11),
- "cmplo p9.d, p4/z, z4.d, #11");
- COMPARE_PREFIX(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7),
- "cmpls p14.b, p5/z, z9.b, #7");
- COMPARE_PREFIX(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4),
- "cmpls p14.h, p5/z, z9.h, #4");
- COMPARE_PREFIX(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3),
- "cmpls p14.s, p5/z, z9.s, #3");
- COMPARE_PREFIX(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1),
- "cmpls p14.d, p5/z, z9.d, #1");
+ COMPARE(cmphi(p8.VnB(), p6.Zeroing(), z1.VnB(), 127),
+ "cmphi p8.b, p6/z, z1.b, #127");
+ COMPARE(cmphi(p8.VnH(), p6.Zeroing(), z1.VnH(), 126),
+ "cmphi p8.h, p6/z, z1.h, #126");
+ COMPARE(cmphi(p8.VnS(), p6.Zeroing(), z1.VnS(), 99),
+ "cmphi p8.s, p6/z, z1.s, #99");
+ COMPARE(cmphi(p8.VnD(), p6.Zeroing(), z1.VnD(), 78),
+ "cmphi p8.d, p6/z, z1.d, #78");
+ COMPARE(cmphs(p11.VnB(), p2.Zeroing(), z8.VnB(), 67),
+ "cmphs p11.b, p2/z, z8.b, #67");
+ COMPARE(cmphs(p11.VnH(), p2.Zeroing(), z8.VnH(), 63),
+ "cmphs p11.h, p2/z, z8.h, #63");
+ COMPARE(cmphs(p11.VnS(), p2.Zeroing(), z8.VnS(), 51),
+ "cmphs p11.s, p2/z, z8.s, #51");
+ COMPARE(cmphs(p11.VnD(), p2.Zeroing(), z8.VnD(), 40),
+ "cmphs p11.d, p2/z, z8.d, #40");
+ COMPARE(cmplo(p9.VnB(), p4.Zeroing(), z4.VnB(), 32),
+ "cmplo p9.b, p4/z, z4.b, #32");
+ COMPARE(cmplo(p9.VnH(), p4.Zeroing(), z4.VnH(), 22),
+ "cmplo p9.h, p4/z, z4.h, #22");
+ COMPARE(cmplo(p9.VnS(), p4.Zeroing(), z4.VnS(), 15),
+ "cmplo p9.s, p4/z, z4.s, #15");
+ COMPARE(cmplo(p9.VnD(), p4.Zeroing(), z4.VnD(), 11),
+ "cmplo p9.d, p4/z, z4.d, #11");
+ COMPARE(cmpls(p14.VnB(), p5.Zeroing(), z9.VnB(), 7),
+ "cmpls p14.b, p5/z, z9.b, #7");
+ COMPARE(cmpls(p14.VnH(), p5.Zeroing(), z9.VnH(), 4),
+ "cmpls p14.h, p5/z, z9.h, #4");
+ COMPARE(cmpls(p14.VnS(), p5.Zeroing(), z9.VnS(), 3),
+ "cmpls p14.s, p5/z, z9.s, #3");
+ COMPARE(cmpls(p14.VnD(), p5.Zeroing(), z9.VnD(), 1),
+ "cmpls p14.d, p5/z, z9.d, #1");
CLEANUP();
}
@@ -2550,146 +2620,146 @@ TEST(sve_int_compare_unsigned_imm) {
TEST(sve_int_compare_vectors) {
SETUP();
- COMPARE_PREFIX(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()),
- "cmpeq p13.b, p0/z, z26.b, z10.d");
- COMPARE_PREFIX(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()),
- "cmpeq p13.h, p0/z, z26.h, z10.d");
- COMPARE_PREFIX(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()),
- "cmpeq p13.s, p0/z, z26.s, z10.d");
- COMPARE_PREFIX(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()),
- "cmpeq p14.b, p3/z, z18.b, z15.b");
- COMPARE_PREFIX(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()),
- "cmpeq p14.h, p3/z, z18.h, z15.h");
- COMPARE_PREFIX(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()),
- "cmpeq p14.s, p3/z, z18.s, z15.s");
- COMPARE_PREFIX(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()),
- "cmpeq p14.d, p3/z, z18.d, z15.d");
- COMPARE_PREFIX(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()),
- "cmpge p8.b, p3/z, z13.b, z0.d");
- COMPARE_PREFIX(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()),
- "cmpge p8.h, p3/z, z13.h, z0.d");
- COMPARE_PREFIX(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()),
- "cmpge p8.s, p3/z, z13.s, z0.d");
- COMPARE_PREFIX(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()),
- "cmpge p3.b, p4/z, z6.b, z1.b");
- COMPARE_PREFIX(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()),
- "cmpge p3.h, p4/z, z6.h, z1.h");
- COMPARE_PREFIX(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()),
- "cmpge p3.s, p4/z, z6.s, z1.s");
- COMPARE_PREFIX(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()),
- "cmpge p3.d, p4/z, z6.d, z1.d");
- COMPARE_PREFIX(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()),
- "cmpgt p4.b, p2/z, z24.b, z1.d");
- COMPARE_PREFIX(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()),
- "cmpgt p4.h, p2/z, z24.h, z1.d");
- COMPARE_PREFIX(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()),
- "cmpgt p4.s, p2/z, z24.s, z1.d");
- COMPARE_PREFIX(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()),
- "cmpgt p10.b, p3/z, z23.b, z19.b");
- COMPARE_PREFIX(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()),
- "cmpgt p10.h, p3/z, z23.h, z19.h");
- COMPARE_PREFIX(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()),
- "cmpgt p10.s, p3/z, z23.s, z19.s");
- COMPARE_PREFIX(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()),
- "cmpgt p10.d, p3/z, z23.d, z19.d");
- COMPARE_PREFIX(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()),
- "cmphi p10.b, p6/z, z6.b, z11.d");
- COMPARE_PREFIX(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()),
- "cmphi p10.h, p6/z, z6.h, z11.d");
- COMPARE_PREFIX(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()),
- "cmphi p10.s, p6/z, z6.s, z11.d");
- COMPARE_PREFIX(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()),
- "cmphi p1.b, p0/z, z4.b, z2.b");
- COMPARE_PREFIX(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()),
- "cmphi p1.h, p0/z, z4.h, z2.h");
- COMPARE_PREFIX(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()),
- "cmphi p1.s, p0/z, z4.s, z2.s");
- COMPARE_PREFIX(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()),
- "cmphi p1.d, p0/z, z4.d, z2.d");
- COMPARE_PREFIX(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()),
- "cmphs p10.b, p5/z, z22.b, z5.d");
- COMPARE_PREFIX(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()),
- "cmphs p10.h, p5/z, z22.h, z5.d");
- COMPARE_PREFIX(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()),
- "cmphs p10.s, p5/z, z22.s, z5.d");
- COMPARE_PREFIX(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()),
- "cmphs p12.b, p6/z, z20.b, z24.b");
- COMPARE_PREFIX(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()),
- "cmphs p12.h, p6/z, z20.h, z24.h");
- COMPARE_PREFIX(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()),
- "cmphs p12.s, p6/z, z20.s, z24.s");
- COMPARE_PREFIX(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()),
- "cmphs p12.d, p6/z, z20.d, z24.d");
- COMPARE_PREFIX(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()),
- "cmple p11.b, p2/z, z18.b, z0.d");
- COMPARE_PREFIX(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()),
- "cmple p11.h, p2/z, z18.h, z0.d");
- COMPARE_PREFIX(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()),
- "cmple p11.s, p2/z, z18.s, z0.d");
- COMPARE_PREFIX(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()),
- "cmplo p12.b, p6/z, z21.b, z10.d");
- COMPARE_PREFIX(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()),
- "cmplo p12.h, p6/z, z21.h, z10.d");
- COMPARE_PREFIX(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()),
- "cmplo p12.s, p6/z, z21.s, z10.d");
- COMPARE_PREFIX(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()),
- "cmpls p8.b, p4/z, z9.b, z15.d");
- COMPARE_PREFIX(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()),
- "cmpls p8.h, p4/z, z9.h, z15.d");
- COMPARE_PREFIX(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()),
- "cmpls p8.s, p4/z, z9.s, z15.d");
- COMPARE_PREFIX(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()),
- "cmplt p6.b, p6/z, z4.b, z8.d");
- COMPARE_PREFIX(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()),
- "cmplt p6.h, p6/z, z4.h, z8.d");
- COMPARE_PREFIX(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()),
- "cmplt p6.s, p6/z, z4.s, z8.d");
- COMPARE_PREFIX(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()),
- "cmpne p1.b, p6/z, z31.b, z16.d");
- COMPARE_PREFIX(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()),
- "cmpne p1.h, p6/z, z31.h, z16.d");
- COMPARE_PREFIX(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()),
- "cmpne p1.s, p6/z, z31.s, z16.d");
- COMPARE_PREFIX(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()),
- "cmpne p11.b, p1/z, z3.b, z24.b");
- COMPARE_PREFIX(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()),
- "cmpne p11.h, p1/z, z3.h, z24.h");
- COMPARE_PREFIX(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()),
- "cmpne p11.s, p1/z, z3.s, z24.s");
- COMPARE_PREFIX(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()),
- "cmpne p11.d, p1/z, z3.d, z24.d");
- COMPARE_PREFIX(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()),
- "cmphs p8.b, p4/z, z15.b, z9.b");
- COMPARE_PREFIX(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()),
- "cmphs p8.h, p4/z, z15.h, z9.h");
- COMPARE_PREFIX(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()),
- "cmphs p8.s, p4/z, z15.s, z9.s");
- COMPARE_PREFIX(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()),
- "cmphs p8.d, p4/z, z15.d, z9.d");
- COMPARE_PREFIX(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()),
- "cmphi p10.b, p3/z, z20.b, z14.b");
- COMPARE_PREFIX(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()),
- "cmphi p10.h, p3/z, z20.h, z14.h");
- COMPARE_PREFIX(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()),
- "cmphi p10.s, p3/z, z20.s, z14.s");
- COMPARE_PREFIX(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()),
- "cmphi p10.d, p3/z, z20.d, z14.d");
- COMPARE_PREFIX(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()),
- "cmpge p12.b, p2/z, z25.b, z19.b");
- COMPARE_PREFIX(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()),
- "cmpge p12.h, p2/z, z25.h, z19.h");
- COMPARE_PREFIX(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()),
- "cmpge p12.s, p2/z, z25.s, z19.s");
- COMPARE_PREFIX(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()),
- "cmpge p12.d, p2/z, z25.d, z19.d");
- COMPARE_PREFIX(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()),
- "cmpgt p14.b, p1/z, z30.b, z24.b");
- COMPARE_PREFIX(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()),
- "cmpgt p14.h, p1/z, z30.h, z24.h");
- COMPARE_PREFIX(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()),
- "cmpgt p14.s, p1/z, z30.s, z24.s");
- COMPARE_PREFIX(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()),
- "cmpgt p14.d, p1/z, z30.d, z24.d");
+ COMPARE(cmpeq(p13.VnB(), p0.Zeroing(), z26.VnB(), z10.VnD()),
+ "cmpeq p13.b, p0/z, z26.b, z10.d");
+ COMPARE(cmpeq(p13.VnH(), p0.Zeroing(), z26.VnH(), z10.VnD()),
+ "cmpeq p13.h, p0/z, z26.h, z10.d");
+ COMPARE(cmpeq(p13.VnS(), p0.Zeroing(), z26.VnS(), z10.VnD()),
+ "cmpeq p13.s, p0/z, z26.s, z10.d");
+ COMPARE(cmpeq(p14.VnB(), p3.Zeroing(), z18.VnB(), z15.VnB()),
+ "cmpeq p14.b, p3/z, z18.b, z15.b");
+ COMPARE(cmpeq(p14.VnH(), p3.Zeroing(), z18.VnH(), z15.VnH()),
+ "cmpeq p14.h, p3/z, z18.h, z15.h");
+ COMPARE(cmpeq(p14.VnS(), p3.Zeroing(), z18.VnS(), z15.VnS()),
+ "cmpeq p14.s, p3/z, z18.s, z15.s");
+ COMPARE(cmpeq(p14.VnD(), p3.Zeroing(), z18.VnD(), z15.VnD()),
+ "cmpeq p14.d, p3/z, z18.d, z15.d");
+ COMPARE(cmpge(p8.VnB(), p3.Zeroing(), z13.VnB(), z0.VnD()),
+ "cmpge p8.b, p3/z, z13.b, z0.d");
+ COMPARE(cmpge(p8.VnH(), p3.Zeroing(), z13.VnH(), z0.VnD()),
+ "cmpge p8.h, p3/z, z13.h, z0.d");
+ COMPARE(cmpge(p8.VnS(), p3.Zeroing(), z13.VnS(), z0.VnD()),
+ "cmpge p8.s, p3/z, z13.s, z0.d");
+ COMPARE(cmpge(p3.VnB(), p4.Zeroing(), z6.VnB(), z1.VnB()),
+ "cmpge p3.b, p4/z, z6.b, z1.b");
+ COMPARE(cmpge(p3.VnH(), p4.Zeroing(), z6.VnH(), z1.VnH()),
+ "cmpge p3.h, p4/z, z6.h, z1.h");
+ COMPARE(cmpge(p3.VnS(), p4.Zeroing(), z6.VnS(), z1.VnS()),
+ "cmpge p3.s, p4/z, z6.s, z1.s");
+ COMPARE(cmpge(p3.VnD(), p4.Zeroing(), z6.VnD(), z1.VnD()),
+ "cmpge p3.d, p4/z, z6.d, z1.d");
+ COMPARE(cmpgt(p4.VnB(), p2.Zeroing(), z24.VnB(), z1.VnD()),
+ "cmpgt p4.b, p2/z, z24.b, z1.d");
+ COMPARE(cmpgt(p4.VnH(), p2.Zeroing(), z24.VnH(), z1.VnD()),
+ "cmpgt p4.h, p2/z, z24.h, z1.d");
+ COMPARE(cmpgt(p4.VnS(), p2.Zeroing(), z24.VnS(), z1.VnD()),
+ "cmpgt p4.s, p2/z, z24.s, z1.d");
+ COMPARE(cmpgt(p10.VnB(), p3.Zeroing(), z23.VnB(), z19.VnB()),
+ "cmpgt p10.b, p3/z, z23.b, z19.b");
+ COMPARE(cmpgt(p10.VnH(), p3.Zeroing(), z23.VnH(), z19.VnH()),
+ "cmpgt p10.h, p3/z, z23.h, z19.h");
+ COMPARE(cmpgt(p10.VnS(), p3.Zeroing(), z23.VnS(), z19.VnS()),
+ "cmpgt p10.s, p3/z, z23.s, z19.s");
+ COMPARE(cmpgt(p10.VnD(), p3.Zeroing(), z23.VnD(), z19.VnD()),
+ "cmpgt p10.d, p3/z, z23.d, z19.d");
+ COMPARE(cmphi(p10.VnB(), p6.Zeroing(), z6.VnB(), z11.VnD()),
+ "cmphi p10.b, p6/z, z6.b, z11.d");
+ COMPARE(cmphi(p10.VnH(), p6.Zeroing(), z6.VnH(), z11.VnD()),
+ "cmphi p10.h, p6/z, z6.h, z11.d");
+ COMPARE(cmphi(p10.VnS(), p6.Zeroing(), z6.VnS(), z11.VnD()),
+ "cmphi p10.s, p6/z, z6.s, z11.d");
+ COMPARE(cmphi(p1.VnB(), p0.Zeroing(), z4.VnB(), z2.VnB()),
+ "cmphi p1.b, p0/z, z4.b, z2.b");
+ COMPARE(cmphi(p1.VnH(), p0.Zeroing(), z4.VnH(), z2.VnH()),
+ "cmphi p1.h, p0/z, z4.h, z2.h");
+ COMPARE(cmphi(p1.VnS(), p0.Zeroing(), z4.VnS(), z2.VnS()),
+ "cmphi p1.s, p0/z, z4.s, z2.s");
+ COMPARE(cmphi(p1.VnD(), p0.Zeroing(), z4.VnD(), z2.VnD()),
+ "cmphi p1.d, p0/z, z4.d, z2.d");
+ COMPARE(cmphs(p10.VnB(), p5.Zeroing(), z22.VnB(), z5.VnD()),
+ "cmphs p10.b, p5/z, z22.b, z5.d");
+ COMPARE(cmphs(p10.VnH(), p5.Zeroing(), z22.VnH(), z5.VnD()),
+ "cmphs p10.h, p5/z, z22.h, z5.d");
+ COMPARE(cmphs(p10.VnS(), p5.Zeroing(), z22.VnS(), z5.VnD()),
+ "cmphs p10.s, p5/z, z22.s, z5.d");
+ COMPARE(cmphs(p12.VnB(), p6.Zeroing(), z20.VnB(), z24.VnB()),
+ "cmphs p12.b, p6/z, z20.b, z24.b");
+ COMPARE(cmphs(p12.VnH(), p6.Zeroing(), z20.VnH(), z24.VnH()),
+ "cmphs p12.h, p6/z, z20.h, z24.h");
+ COMPARE(cmphs(p12.VnS(), p6.Zeroing(), z20.VnS(), z24.VnS()),
+ "cmphs p12.s, p6/z, z20.s, z24.s");
+ COMPARE(cmphs(p12.VnD(), p6.Zeroing(), z20.VnD(), z24.VnD()),
+ "cmphs p12.d, p6/z, z20.d, z24.d");
+ COMPARE(cmple(p11.VnB(), p2.Zeroing(), z18.VnB(), z0.VnD()),
+ "cmple p11.b, p2/z, z18.b, z0.d");
+ COMPARE(cmple(p11.VnH(), p2.Zeroing(), z18.VnH(), z0.VnD()),
+ "cmple p11.h, p2/z, z18.h, z0.d");
+ COMPARE(cmple(p11.VnS(), p2.Zeroing(), z18.VnS(), z0.VnD()),
+ "cmple p11.s, p2/z, z18.s, z0.d");
+ COMPARE(cmplo(p12.VnB(), p6.Zeroing(), z21.VnB(), z10.VnD()),
+ "cmplo p12.b, p6/z, z21.b, z10.d");
+ COMPARE(cmplo(p12.VnH(), p6.Zeroing(), z21.VnH(), z10.VnD()),
+ "cmplo p12.h, p6/z, z21.h, z10.d");
+ COMPARE(cmplo(p12.VnS(), p6.Zeroing(), z21.VnS(), z10.VnD()),
+ "cmplo p12.s, p6/z, z21.s, z10.d");
+ COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnD()),
+ "cmpls p8.b, p4/z, z9.b, z15.d");
+ COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnD()),
+ "cmpls p8.h, p4/z, z9.h, z15.d");
+ COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnD()),
+ "cmpls p8.s, p4/z, z9.s, z15.d");
+ COMPARE(cmplt(p6.VnB(), p6.Zeroing(), z4.VnB(), z8.VnD()),
+ "cmplt p6.b, p6/z, z4.b, z8.d");
+ COMPARE(cmplt(p6.VnH(), p6.Zeroing(), z4.VnH(), z8.VnD()),
+ "cmplt p6.h, p6/z, z4.h, z8.d");
+ COMPARE(cmplt(p6.VnS(), p6.Zeroing(), z4.VnS(), z8.VnD()),
+ "cmplt p6.s, p6/z, z4.s, z8.d");
+ COMPARE(cmpne(p1.VnB(), p6.Zeroing(), z31.VnB(), z16.VnD()),
+ "cmpne p1.b, p6/z, z31.b, z16.d");
+ COMPARE(cmpne(p1.VnH(), p6.Zeroing(), z31.VnH(), z16.VnD()),
+ "cmpne p1.h, p6/z, z31.h, z16.d");
+ COMPARE(cmpne(p1.VnS(), p6.Zeroing(), z31.VnS(), z16.VnD()),
+ "cmpne p1.s, p6/z, z31.s, z16.d");
+ COMPARE(cmpne(p11.VnB(), p1.Zeroing(), z3.VnB(), z24.VnB()),
+ "cmpne p11.b, p1/z, z3.b, z24.b");
+ COMPARE(cmpne(p11.VnH(), p1.Zeroing(), z3.VnH(), z24.VnH()),
+ "cmpne p11.h, p1/z, z3.h, z24.h");
+ COMPARE(cmpne(p11.VnS(), p1.Zeroing(), z3.VnS(), z24.VnS()),
+ "cmpne p11.s, p1/z, z3.s, z24.s");
+ COMPARE(cmpne(p11.VnD(), p1.Zeroing(), z3.VnD(), z24.VnD()),
+ "cmpne p11.d, p1/z, z3.d, z24.d");
+ COMPARE(cmpls(p8.VnB(), p4.Zeroing(), z9.VnB(), z15.VnB()),
+ "cmphs p8.b, p4/z, z15.b, z9.b");
+ COMPARE(cmpls(p8.VnH(), p4.Zeroing(), z9.VnH(), z15.VnH()),
+ "cmphs p8.h, p4/z, z15.h, z9.h");
+ COMPARE(cmpls(p8.VnS(), p4.Zeroing(), z9.VnS(), z15.VnS()),
+ "cmphs p8.s, p4/z, z15.s, z9.s");
+ COMPARE(cmpls(p8.VnD(), p4.Zeroing(), z9.VnD(), z15.VnD()),
+ "cmphs p8.d, p4/z, z15.d, z9.d");
+ COMPARE(cmplo(p10.VnB(), p3.Zeroing(), z14.VnB(), z20.VnB()),
+ "cmphi p10.b, p3/z, z20.b, z14.b");
+ COMPARE(cmplo(p10.VnH(), p3.Zeroing(), z14.VnH(), z20.VnH()),
+ "cmphi p10.h, p3/z, z20.h, z14.h");
+ COMPARE(cmplo(p10.VnS(), p3.Zeroing(), z14.VnS(), z20.VnS()),
+ "cmphi p10.s, p3/z, z20.s, z14.s");
+ COMPARE(cmplo(p10.VnD(), p3.Zeroing(), z14.VnD(), z20.VnD()),
+ "cmphi p10.d, p3/z, z20.d, z14.d");
+ COMPARE(cmple(p12.VnB(), p2.Zeroing(), z19.VnB(), z25.VnB()),
+ "cmpge p12.b, p2/z, z25.b, z19.b");
+ COMPARE(cmple(p12.VnH(), p2.Zeroing(), z19.VnH(), z25.VnH()),
+ "cmpge p12.h, p2/z, z25.h, z19.h");
+ COMPARE(cmple(p12.VnS(), p2.Zeroing(), z19.VnS(), z25.VnS()),
+ "cmpge p12.s, p2/z, z25.s, z19.s");
+ COMPARE(cmple(p12.VnD(), p2.Zeroing(), z19.VnD(), z25.VnD()),
+ "cmpge p12.d, p2/z, z25.d, z19.d");
+ COMPARE(cmplt(p14.VnB(), p1.Zeroing(), z24.VnB(), z30.VnB()),
+ "cmpgt p14.b, p1/z, z30.b, z24.b");
+ COMPARE(cmplt(p14.VnH(), p1.Zeroing(), z24.VnH(), z30.VnH()),
+ "cmpgt p14.h, p1/z, z30.h, z24.h");
+ COMPARE(cmplt(p14.VnS(), p1.Zeroing(), z24.VnS(), z30.VnS()),
+ "cmpgt p14.s, p1/z, z30.s, z24.s");
+ COMPARE(cmplt(p14.VnD(), p1.Zeroing(), z24.VnD(), z30.VnD()),
+ "cmpgt p14.d, p1/z, z30.d, z24.d");
CLEANUP();
}
@@ -2697,16 +2767,16 @@ TEST(sve_int_compare_vectors) {
TEST(sve_int_misc_unpredicated) {
SETUP();
- COMPARE_PREFIX(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h");
- COMPARE_PREFIX(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s");
- COMPARE_PREFIX(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d");
- COMPARE_PREFIX(ftssel(z17.VnH(), z24.VnH(), z14.VnH()),
- "ftssel z17.h, z24.h, z14.h");
- COMPARE_PREFIX(ftssel(z17.VnS(), z24.VnS(), z14.VnS()),
- "ftssel z17.s, z24.s, z14.s");
- COMPARE_PREFIX(ftssel(z17.VnD(), z24.VnD(), z14.VnD()),
- "ftssel z17.d, z24.d, z14.d");
- COMPARE_PREFIX(movprfx(z24, z1), "movprfx z24, z1");
+ COMPARE(fexpa(z5.VnH(), z9.VnH()), "fexpa z5.h, z9.h");
+ COMPARE(fexpa(z5.VnS(), z9.VnS()), "fexpa z5.s, z9.s");
+ COMPARE(fexpa(z5.VnD(), z9.VnD()), "fexpa z5.d, z9.d");
+ COMPARE(ftssel(z17.VnH(), z24.VnH(), z14.VnH()),
+ "ftssel z17.h, z24.h, z14.h");
+ COMPARE(ftssel(z17.VnS(), z24.VnS(), z14.VnS()),
+ "ftssel z17.s, z24.s, z14.s");
+ COMPARE(ftssel(z17.VnD(), z24.VnD(), z14.VnD()),
+ "ftssel z17.d, z24.d, z14.d");
+ COMPARE(movprfx(z24, z1), "movprfx z24, z1");
CLEANUP();
}
@@ -2714,38 +2784,38 @@ TEST(sve_int_misc_unpredicated) {
TEST(sve_int_mul_add_predicated) {
SETUP();
- COMPARE_PREFIX(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()),
- "mad z29.b, p6/m, z22.b, z21.b");
- COMPARE_PREFIX(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()),
- "mad z29.h, p6/m, z22.h, z21.h");
- COMPARE_PREFIX(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()),
- "mad z29.s, p6/m, z22.s, z21.s");
- COMPARE_PREFIX(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()),
- "mad z29.d, p6/m, z22.d, z21.d");
- COMPARE_PREFIX(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()),
- "mla z23.b, p1/m, z21.b, z23.b");
- COMPARE_PREFIX(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()),
- "mla z23.h, p1/m, z21.h, z23.h");
- COMPARE_PREFIX(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()),
- "mla z23.s, p1/m, z21.s, z23.s");
- COMPARE_PREFIX(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()),
- "mla z23.d, p1/m, z21.d, z23.d");
- COMPARE_PREFIX(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()),
- "mls z4.b, p6/m, z17.b, z28.b");
- COMPARE_PREFIX(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()),
- "mls z4.h, p6/m, z17.h, z28.h");
- COMPARE_PREFIX(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()),
- "mls z4.s, p6/m, z17.s, z28.s");
- COMPARE_PREFIX(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()),
- "mls z4.d, p6/m, z17.d, z28.d");
- COMPARE_PREFIX(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()),
- "msb z27.b, p7/m, z29.b, z1.b");
- COMPARE_PREFIX(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()),
- "msb z27.h, p7/m, z29.h, z1.h");
- COMPARE_PREFIX(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()),
- "msb z27.s, p7/m, z29.s, z1.s");
- COMPARE_PREFIX(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()),
- "msb z27.d, p7/m, z29.d, z1.d");
+ COMPARE(mad(z29.VnB(), p6.Merging(), z22.VnB(), z21.VnB()),
+ "mad z29.b, p6/m, z22.b, z21.b");
+ COMPARE(mad(z29.VnH(), p6.Merging(), z22.VnH(), z21.VnH()),
+ "mad z29.h, p6/m, z22.h, z21.h");
+ COMPARE(mad(z29.VnS(), p6.Merging(), z22.VnS(), z21.VnS()),
+ "mad z29.s, p6/m, z22.s, z21.s");
+ COMPARE(mad(z29.VnD(), p6.Merging(), z22.VnD(), z21.VnD()),
+ "mad z29.d, p6/m, z22.d, z21.d");
+ COMPARE(mla(z23.VnB(), p1.Merging(), z21.VnB(), z23.VnB()),
+ "mla z23.b, p1/m, z21.b, z23.b");
+ COMPARE(mla(z23.VnH(), p1.Merging(), z21.VnH(), z23.VnH()),
+ "mla z23.h, p1/m, z21.h, z23.h");
+ COMPARE(mla(z23.VnS(), p1.Merging(), z21.VnS(), z23.VnS()),
+ "mla z23.s, p1/m, z21.s, z23.s");
+ COMPARE(mla(z23.VnD(), p1.Merging(), z21.VnD(), z23.VnD()),
+ "mla z23.d, p1/m, z21.d, z23.d");
+ COMPARE(mls(z4.VnB(), p6.Merging(), z17.VnB(), z28.VnB()),
+ "mls z4.b, p6/m, z17.b, z28.b");
+ COMPARE(mls(z4.VnH(), p6.Merging(), z17.VnH(), z28.VnH()),
+ "mls z4.h, p6/m, z17.h, z28.h");
+ COMPARE(mls(z4.VnS(), p6.Merging(), z17.VnS(), z28.VnS()),
+ "mls z4.s, p6/m, z17.s, z28.s");
+ COMPARE(mls(z4.VnD(), p6.Merging(), z17.VnD(), z28.VnD()),
+ "mls z4.d, p6/m, z17.d, z28.d");
+ COMPARE(msb(z27.VnB(), p7.Merging(), z29.VnB(), z1.VnB()),
+ "msb z27.b, p7/m, z29.b, z1.b");
+ COMPARE(msb(z27.VnH(), p7.Merging(), z29.VnH(), z1.VnH()),
+ "msb z27.h, p7/m, z29.h, z1.h");
+ COMPARE(msb(z27.VnS(), p7.Merging(), z29.VnS(), z1.VnS()),
+ "msb z27.s, p7/m, z29.s, z1.s");
+ COMPARE(msb(z27.VnD(), p7.Merging(), z29.VnD(), z1.VnD()),
+ "msb z27.d, p7/m, z29.d, z1.d");
CLEANUP();
}
@@ -2779,14 +2849,10 @@ TEST(sve_int_mul_add_predicated_macro) {
TEST(sve_int_mul_add_unpredicated) {
SETUP();
- COMPARE_PREFIX(sdot(z13.VnS(), z12.VnB(), z12.VnB()),
- "sdot z13.s, z12.b, z12.b");
- COMPARE_PREFIX(sdot(z18.VnD(), z27.VnH(), z22.VnH()),
- "sdot z18.d, z27.h, z22.h");
- COMPARE_PREFIX(udot(z23.VnS(), z22.VnB(), z11.VnB()),
- "udot z23.s, z22.b, z11.b");
- COMPARE_PREFIX(udot(z21.VnD(), z27.VnH(), z27.VnH()),
- "udot z21.d, z27.h, z27.h");
+ COMPARE(sdot(z13.VnS(), z12.VnB(), z12.VnB()), "sdot z13.s, z12.b, z12.b");
+ COMPARE(sdot(z18.VnD(), z27.VnH(), z22.VnH()), "sdot z18.d, z27.h, z22.h");
+ COMPARE(udot(z23.VnS(), z22.VnB(), z11.VnB()), "udot z23.s, z22.b, z11.b");
+ COMPARE(udot(z21.VnD(), z27.VnH(), z27.VnH()), "udot z21.d, z27.h, z27.h");
CLEANUP();
}
@@ -2831,49 +2897,49 @@ TEST(sve_int_mul_add_unpredicated_macro) {
TEST(sve_int_reduction) {
SETUP();
- COMPARE_PREFIX(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b");
- COMPARE_PREFIX(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h");
- COMPARE_PREFIX(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s");
- COMPARE_PREFIX(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d");
- COMPARE_PREFIX(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b");
- COMPARE_PREFIX(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h");
- COMPARE_PREFIX(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s");
- COMPARE_PREFIX(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d");
- COMPARE_PREFIX(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()),
- "movprfx z30.b, p2/z, z23.b");
- COMPARE_PREFIX(movprfx(z10.VnH(), p0.Merging(), z10.VnH()),
- "movprfx z10.h, p0/m, z10.h");
- COMPARE_PREFIX(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()),
- "movprfx z0.s, p2/z, z23.s");
- COMPARE_PREFIX(movprfx(z31.VnD(), p7.Merging(), z23.VnD()),
- "movprfx z31.d, p7/m, z23.d");
- COMPARE_PREFIX(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b");
- COMPARE_PREFIX(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h");
- COMPARE_PREFIX(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s");
- COMPARE_PREFIX(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d");
- COMPARE_PREFIX(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b");
- COMPARE_PREFIX(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h");
- COMPARE_PREFIX(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s");
- COMPARE_PREFIX(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b");
- COMPARE_PREFIX(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h");
- COMPARE_PREFIX(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s");
- COMPARE_PREFIX(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d");
- COMPARE_PREFIX(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b");
- COMPARE_PREFIX(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h");
- COMPARE_PREFIX(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s");
- COMPARE_PREFIX(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d");
- COMPARE_PREFIX(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b");
- COMPARE_PREFIX(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h");
- COMPARE_PREFIX(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s");
- COMPARE_PREFIX(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d");
- COMPARE_PREFIX(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b");
- COMPARE_PREFIX(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h");
- COMPARE_PREFIX(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s");
- COMPARE_PREFIX(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d");
- COMPARE_PREFIX(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b");
- COMPARE_PREFIX(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h");
- COMPARE_PREFIX(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s");
- COMPARE_PREFIX(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d");
+ COMPARE(andv(b15, p1, z4.VnB()), "andv b15, p1, z4.b");
+ COMPARE(andv(h14, p2, z3.VnH()), "andv h14, p2, z3.h");
+ COMPARE(andv(s13, p3, z2.VnS()), "andv s13, p3, z2.s");
+ COMPARE(andv(d12, p4, z1.VnD()), "andv d12, p4, z1.d");
+ COMPARE(eorv(b12, p0, z30.VnB()), "eorv b12, p0, z30.b");
+ COMPARE(eorv(h11, p1, z29.VnH()), "eorv h11, p1, z29.h");
+ COMPARE(eorv(s10, p2, z28.VnS()), "eorv s10, p2, z28.s");
+ COMPARE(eorv(d9, p3, z27.VnD()), "eorv d9, p3, z27.d");
+ COMPARE(movprfx(z30.VnB(), p2.Zeroing(), z23.VnB()),
+ "movprfx z30.b, p2/z, z23.b");
+ COMPARE(movprfx(z10.VnH(), p0.Merging(), z10.VnH()),
+ "movprfx z10.h, p0/m, z10.h");
+ COMPARE(movprfx(z0.VnS(), p2.Zeroing(), z23.VnS()),
+ "movprfx z0.s, p2/z, z23.s");
+ COMPARE(movprfx(z31.VnD(), p7.Merging(), z23.VnD()),
+ "movprfx z31.d, p7/m, z23.d");
+ COMPARE(orv(b4, p0, z16.VnB()), "orv b4, p0, z16.b");
+ COMPARE(orv(h6, p2, z18.VnH()), "orv h6, p2, z18.h");
+ COMPARE(orv(s8, p4, z20.VnS()), "orv s8, p4, z20.s");
+ COMPARE(orv(d10, p6, z22.VnD()), "orv d10, p6, z22.d");
+ COMPARE(saddv(d20, p1, z12.VnB()), "saddv d20, p1, z12.b");
+ COMPARE(saddv(d22, p3, z15.VnH()), "saddv d22, p3, z15.h");
+ COMPARE(saddv(d24, p5, z18.VnS()), "saddv d24, p5, z18.s");
+ COMPARE(smaxv(b9, p3, z1.VnB()), "smaxv b9, p3, z1.b");
+ COMPARE(smaxv(h19, p2, z1.VnH()), "smaxv h19, p2, z1.h");
+ COMPARE(smaxv(s29, p1, z1.VnS()), "smaxv s29, p1, z1.s");
+ COMPARE(smaxv(d9, p0, z1.VnD()), "smaxv d9, p0, z1.d");
+ COMPARE(sminv(b8, p3, z14.VnB()), "sminv b8, p3, z14.b");
+ COMPARE(sminv(h18, p2, z4.VnH()), "sminv h18, p2, z4.h");
+ COMPARE(sminv(s28, p1, z4.VnS()), "sminv s28, p1, z4.s");
+ COMPARE(sminv(d8, p0, z24.VnD()), "sminv d8, p0, z24.d");
+ COMPARE(uaddv(d13, p0, z15.VnB()), "uaddv d13, p0, z15.b");
+ COMPARE(uaddv(d15, p2, z20.VnH()), "uaddv d15, p2, z20.h");
+ COMPARE(uaddv(d17, p4, z25.VnS()), "uaddv d17, p4, z25.s");
+ COMPARE(uaddv(d19, p6, z30.VnD()), "uaddv d19, p6, z30.d");
+ COMPARE(umaxv(b28, p3, z4.VnB()), "umaxv b28, p3, z4.b");
+ COMPARE(umaxv(h18, p6, z2.VnH()), "umaxv h18, p6, z2.h");
+ COMPARE(umaxv(s18, p4, z29.VnS()), "umaxv s18, p4, z29.s");
+ COMPARE(umaxv(d28, p1, z24.VnD()), "umaxv d28, p1, z24.d");
+ COMPARE(uminv(b16, p3, z0.VnB()), "uminv b16, p3, z0.b");
+ COMPARE(uminv(h16, p2, z3.VnH()), "uminv h16, p2, z3.h");
+ COMPARE(uminv(s16, p1, z5.VnS()), "uminv s16, p1, z5.s");
+ COMPARE(uminv(d16, p7, z7.VnD()), "uminv d16, p7, z7.d");
CLEANUP();
}
@@ -2881,97 +2947,70 @@ TEST(sve_int_reduction) {
TEST(sve_int_unary_arithmetic_predicated) {
SETUP();
- COMPARE_PREFIX(abs(z5.VnB(), p5.Merging(), z31.VnB()),
- "abs z5.b, p5/m, z31.b");
- COMPARE_PREFIX(abs(z29.VnH(), p5.Merging(), z17.VnH()),
- "abs z29.h, p5/m, z17.h");
- COMPARE_PREFIX(abs(z6.VnS(), p4.Merging(), z24.VnS()),
- "abs z6.s, p4/m, z24.s");
- COMPARE_PREFIX(abs(z19.VnD(), p3.Merging(), z25.VnD()),
- "abs z19.d, p3/m, z25.d");
- COMPARE_PREFIX(cls(z4.VnB(), p0.Merging(), z20.VnB()),
- "cls z4.b, p0/m, z20.b");
- COMPARE_PREFIX(cls(z11.VnH(), p0.Merging(), z26.VnH()),
- "cls z11.h, p0/m, z26.h");
- COMPARE_PREFIX(cls(z10.VnS(), p1.Merging(), z10.VnS()),
- "cls z10.s, p1/m, z10.s");
- COMPARE_PREFIX(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d");
- COMPARE_PREFIX(clz(z18.VnB(), p3.Merging(), z1.VnB()),
- "clz z18.b, p3/m, z1.b");
- COMPARE_PREFIX(clz(z13.VnH(), p4.Merging(), z18.VnH()),
- "clz z13.h, p4/m, z18.h");
- COMPARE_PREFIX(clz(z15.VnS(), p4.Merging(), z24.VnS()),
- "clz z15.s, p4/m, z24.s");
- COMPARE_PREFIX(clz(z29.VnD(), p2.Merging(), z22.VnD()),
- "clz z29.d, p2/m, z22.d");
- COMPARE_PREFIX(cnot(z16.VnB(), p6.Merging(), z20.VnB()),
- "cnot z16.b, p6/m, z20.b");
- COMPARE_PREFIX(cnot(z10.VnH(), p5.Merging(), z12.VnH()),
- "cnot z10.h, p5/m, z12.h");
- COMPARE_PREFIX(cnot(z8.VnS(), p5.Merging(), z21.VnS()),
- "cnot z8.s, p5/m, z21.s");
- COMPARE_PREFIX(cnot(z3.VnD(), p3.Merging(), z18.VnD()),
- "cnot z3.d, p3/m, z18.d");
- COMPARE_PREFIX(cnt(z29.VnB(), p3.Merging(), z7.VnB()),
- "cnt z29.b, p3/m, z7.b");
- COMPARE_PREFIX(cnt(z3.VnH(), p6.Merging(), z31.VnH()),
- "cnt z3.h, p6/m, z31.h");
- COMPARE_PREFIX(cnt(z2.VnS(), p4.Merging(), z16.VnS()),
- "cnt z2.s, p4/m, z16.s");
- COMPARE_PREFIX(cnt(z0.VnD(), p0.Merging(), z24.VnD()),
- "cnt z0.d, p0/m, z24.d");
- COMPARE_PREFIX(fabs(z17.VnH(), p7.Merging(), z15.VnH()),
- "fabs z17.h, p7/m, z15.h");
- COMPARE_PREFIX(fabs(z18.VnS(), p0.Merging(), z29.VnS()),
- "fabs z18.s, p0/m, z29.s");
- COMPARE_PREFIX(fabs(z17.VnD(), p1.Merging(), z9.VnD()),
- "fabs z17.d, p1/m, z9.d");
- COMPARE_PREFIX(fneg(z25.VnH(), p1.Merging(), z28.VnH()),
- "fneg z25.h, p1/m, z28.h");
- COMPARE_PREFIX(fneg(z5.VnS(), p1.Merging(), z25.VnS()),
- "fneg z5.s, p1/m, z25.s");
- COMPARE_PREFIX(fneg(z6.VnD(), p1.Merging(), z17.VnD()),
- "fneg z6.d, p1/m, z17.d");
- COMPARE_PREFIX(neg(z25.VnB(), p4.Merging(), z8.VnB()),
- "neg z25.b, p4/m, z8.b");
- COMPARE_PREFIX(neg(z30.VnH(), p3.Merging(), z23.VnH()),
- "neg z30.h, p3/m, z23.h");
- COMPARE_PREFIX(neg(z7.VnS(), p2.Merging(), z26.VnS()),
- "neg z7.s, p2/m, z26.s");
- COMPARE_PREFIX(neg(z21.VnD(), p3.Merging(), z5.VnD()),
- "neg z21.d, p3/m, z5.d");
- COMPARE_PREFIX(not_(z24.VnB(), p1.Merging(), z27.VnB()),
- "not z24.b, p1/m, z27.b");
- COMPARE_PREFIX(not_(z31.VnH(), p6.Merging(), z19.VnH()),
- "not z31.h, p6/m, z19.h");
- COMPARE_PREFIX(not_(z18.VnS(), p5.Merging(), z13.VnS()),
- "not z18.s, p5/m, z13.s");
- COMPARE_PREFIX(not_(z12.VnD(), p2.Merging(), z28.VnD()),
- "not z12.d, p2/m, z28.d");
- COMPARE_PREFIX(sxtb(z19.VnH(), p7.Merging(), z3.VnH()),
- "sxtb z19.h, p7/m, z3.h");
- COMPARE_PREFIX(sxtb(z3.VnS(), p1.Merging(), z17.VnS()),
- "sxtb z3.s, p1/m, z17.s");
- COMPARE_PREFIX(sxtb(z27.VnD(), p0.Merging(), z12.VnD()),
- "sxtb z27.d, p0/m, z12.d");
- COMPARE_PREFIX(sxth(z6.VnS(), p1.Merging(), z17.VnS()),
- "sxth z6.s, p1/m, z17.s");
- COMPARE_PREFIX(sxth(z8.VnD(), p6.Merging(), z2.VnD()),
- "sxth z8.d, p6/m, z2.d");
- COMPARE_PREFIX(sxtw(z13.VnD(), p3.Merging(), z27.VnD()),
- "sxtw z13.d, p3/m, z27.d");
- COMPARE_PREFIX(uxtb(z23.VnH(), p3.Merging(), z21.VnH()),
- "uxtb z23.h, p3/m, z21.h");
- COMPARE_PREFIX(uxtb(z0.VnS(), p2.Merging(), z13.VnS()),
- "uxtb z0.s, p2/m, z13.s");
- COMPARE_PREFIX(uxtb(z1.VnD(), p3.Merging(), z13.VnD()),
- "uxtb z1.d, p3/m, z13.d");
- COMPARE_PREFIX(uxth(z27.VnS(), p0.Merging(), z29.VnS()),
- "uxth z27.s, p0/m, z29.s");
- COMPARE_PREFIX(uxth(z22.VnD(), p4.Merging(), z20.VnD()),
- "uxth z22.d, p4/m, z20.d");
- COMPARE_PREFIX(uxtw(z14.VnD(), p1.Merging(), z13.VnD()),
- "uxtw z14.d, p1/m, z13.d");
+ COMPARE(abs(z5.VnB(), p5.Merging(), z31.VnB()), "abs z5.b, p5/m, z31.b");
+ COMPARE(abs(z29.VnH(), p5.Merging(), z17.VnH()), "abs z29.h, p5/m, z17.h");
+ COMPARE(abs(z6.VnS(), p4.Merging(), z24.VnS()), "abs z6.s, p4/m, z24.s");
+ COMPARE(abs(z19.VnD(), p3.Merging(), z25.VnD()), "abs z19.d, p3/m, z25.d");
+ COMPARE(cls(z4.VnB(), p0.Merging(), z20.VnB()), "cls z4.b, p0/m, z20.b");
+ COMPARE(cls(z11.VnH(), p0.Merging(), z26.VnH()), "cls z11.h, p0/m, z26.h");
+ COMPARE(cls(z10.VnS(), p1.Merging(), z10.VnS()), "cls z10.s, p1/m, z10.s");
+ COMPARE(cls(z5.VnD(), p1.Merging(), z4.VnD()), "cls z5.d, p1/m, z4.d");
+ COMPARE(clz(z18.VnB(), p3.Merging(), z1.VnB()), "clz z18.b, p3/m, z1.b");
+ COMPARE(clz(z13.VnH(), p4.Merging(), z18.VnH()), "clz z13.h, p4/m, z18.h");
+ COMPARE(clz(z15.VnS(), p4.Merging(), z24.VnS()), "clz z15.s, p4/m, z24.s");
+ COMPARE(clz(z29.VnD(), p2.Merging(), z22.VnD()), "clz z29.d, p2/m, z22.d");
+ COMPARE(cnot(z16.VnB(), p6.Merging(), z20.VnB()), "cnot z16.b, p6/m, z20.b");
+ COMPARE(cnot(z10.VnH(), p5.Merging(), z12.VnH()), "cnot z10.h, p5/m, z12.h");
+ COMPARE(cnot(z8.VnS(), p5.Merging(), z21.VnS()), "cnot z8.s, p5/m, z21.s");
+ COMPARE(cnot(z3.VnD(), p3.Merging(), z18.VnD()), "cnot z3.d, p3/m, z18.d");
+ COMPARE(cnt(z29.VnB(), p3.Merging(), z7.VnB()), "cnt z29.b, p3/m, z7.b");
+ COMPARE(cnt(z3.VnH(), p6.Merging(), z31.VnH()), "cnt z3.h, p6/m, z31.h");
+ COMPARE(cnt(z2.VnS(), p4.Merging(), z16.VnS()), "cnt z2.s, p4/m, z16.s");
+ COMPARE(cnt(z0.VnD(), p0.Merging(), z24.VnD()), "cnt z0.d, p0/m, z24.d");
+ COMPARE(fabs(z17.VnH(), p7.Merging(), z15.VnH()), "fabs z17.h, p7/m, z15.h");
+ COMPARE(fabs(z18.VnS(), p0.Merging(), z29.VnS()), "fabs z18.s, p0/m, z29.s");
+ COMPARE(fabs(z17.VnD(), p1.Merging(), z9.VnD()), "fabs z17.d, p1/m, z9.d");
+ COMPARE(fneg(z25.VnH(), p1.Merging(), z28.VnH()), "fneg z25.h, p1/m, z28.h");
+ COMPARE(fneg(z5.VnS(), p1.Merging(), z25.VnS()), "fneg z5.s, p1/m, z25.s");
+ COMPARE(fneg(z6.VnD(), p1.Merging(), z17.VnD()), "fneg z6.d, p1/m, z17.d");
+ COMPARE(neg(z25.VnB(), p4.Merging(), z8.VnB()), "neg z25.b, p4/m, z8.b");
+ COMPARE(neg(z30.VnH(), p3.Merging(), z23.VnH()), "neg z30.h, p3/m, z23.h");
+ COMPARE(neg(z7.VnS(), p2.Merging(), z26.VnS()), "neg z7.s, p2/m, z26.s");
+ COMPARE(neg(z21.VnD(), p3.Merging(), z5.VnD()), "neg z21.d, p3/m, z5.d");
+ COMPARE(not_(z24.VnB(), p1.Merging(), z27.VnB()), "not z24.b, p1/m, z27.b");
+ COMPARE(not_(z31.VnH(), p6.Merging(), z19.VnH()), "not z31.h, p6/m, z19.h");
+ COMPARE(not_(z18.VnS(), p5.Merging(), z13.VnS()), "not z18.s, p5/m, z13.s");
+ COMPARE(not_(z12.VnD(), p2.Merging(), z28.VnD()), "not z12.d, p2/m, z28.d");
+ COMPARE(sxtb(z19.VnH(), p7.Merging(), z3.VnH()), "sxtb z19.h, p7/m, z3.h");
+ COMPARE(sxtb(z3.VnS(), p1.Merging(), z17.VnS()), "sxtb z3.s, p1/m, z17.s");
+ COMPARE(sxtb(z27.VnD(), p0.Merging(), z12.VnD()), "sxtb z27.d, p0/m, z12.d");
+ COMPARE(sxth(z6.VnS(), p1.Merging(), z17.VnS()), "sxth z6.s, p1/m, z17.s");
+ COMPARE(sxth(z8.VnD(), p6.Merging(), z2.VnD()), "sxth z8.d, p6/m, z2.d");
+ COMPARE(sxtw(z13.VnD(), p3.Merging(), z27.VnD()), "sxtw z13.d, p3/m, z27.d");
+ COMPARE(uxtb(z23.VnH(), p3.Merging(), z21.VnH()), "uxtb z23.h, p3/m, z21.h");
+ COMPARE(uxtb(z0.VnS(), p2.Merging(), z13.VnS()), "uxtb z0.s, p2/m, z13.s");
+ COMPARE(uxtb(z1.VnD(), p3.Merging(), z13.VnD()), "uxtb z1.d, p3/m, z13.d");
+ COMPARE(uxth(z27.VnS(), p0.Merging(), z29.VnS()), "uxth z27.s, p0/m, z29.s");
+ COMPARE(uxth(z22.VnD(), p4.Merging(), z20.VnD()), "uxth z22.d, p4/m, z20.d");
+ COMPARE(uxtw(z14.VnD(), p1.Merging(), z13.VnD()), "uxtw z14.d, p1/m, z13.d");
+
+ // Check related but undefined encodings.
+ COMPARE(dci(0x0410a000), "unallocated (Unallocated)"); // sxtb b
+ COMPARE(dci(0x0412a000), "unallocated (Unallocated)"); // sxth b
+ COMPARE(dci(0x0452a000), "unallocated (Unallocated)"); // sxth h
+ COMPARE(dci(0x0414a000), "unallocated (Unallocated)"); // sxtw b
+ COMPARE(dci(0x0454a000), "unallocated (Unallocated)"); // sxtw h
+ COMPARE(dci(0x0494a000), "unallocated (Unallocated)"); // sxtw s
+
+ COMPARE(dci(0x0411a000), "unallocated (Unallocated)"); // uxtb b
+ COMPARE(dci(0x0413a000), "unallocated (Unallocated)"); // uxth b
+ COMPARE(dci(0x0453a000), "unallocated (Unallocated)"); // uxth h
+ COMPARE(dci(0x0415a000), "unallocated (Unallocated)"); // uxtw b
+ COMPARE(dci(0x0455a000), "unallocated (Unallocated)"); // uxtw h
+ COMPARE(dci(0x0495a000), "unallocated (Unallocated)"); // uxtw s
+
+ COMPARE(dci(0x041ca000), "unallocated (Unallocated)"); // fabs b
+ COMPARE(dci(0x041da000), "unallocated (Unallocated)"); // fneg b
CLEANUP();
}
@@ -2994,22 +3033,22 @@ TEST(sve_neg_macro) {
TEST(sve_cpy_fcpy_imm) {
SETUP();
- COMPARE_PREFIX(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1");
- COMPARE_PREFIX(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1");
- COMPARE_PREFIX(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127");
- COMPARE_PREFIX(cpy(z25.VnS(), p13.Merging(), 10752),
- "mov z25.s, p13/m, #42, lsl #8");
- COMPARE_PREFIX(cpy(z25.VnD(), p13.Merging(), -10752),
- "mov z25.d, p13/m, #-42, lsl #8");
- COMPARE_PREFIX(mov(z25.VnD(), p13.Merging(), -10752),
- "mov z25.d, p13/m, #-42, lsl #8");
-
- COMPARE_PREFIX(fcpy(z20.VnH(), p11.Merging(), 29.0),
- "fmov z20.h, p11/m, #0x3d (29.0000)");
- COMPARE_PREFIX(fmov(z20.VnS(), p11.Merging(), -31.0),
- "fmov z20.s, p11/m, #0xbf (-31.0000)");
- COMPARE_PREFIX(fcpy(z20.VnD(), p11.Merging(), 1.0),
- "fmov z20.d, p11/m, #0x70 (1.0000)");
+ COMPARE(cpy(z25.VnB(), p13.Zeroing(), -1), "mov z25.b, p13/z, #-1");
+ COMPARE(cpy(z25.VnB(), p13.Merging(), -1), "mov z25.b, p13/m, #-1");
+ COMPARE(cpy(z25.VnH(), p13.Merging(), 127), "mov z25.h, p13/m, #127");
+ COMPARE(cpy(z25.VnS(), p13.Merging(), 10752),
+ "mov z25.s, p13/m, #42, lsl #8");
+ COMPARE(cpy(z25.VnD(), p13.Merging(), -10752),
+ "mov z25.d, p13/m, #-42, lsl #8");
+ COMPARE(mov(z25.VnD(), p13.Merging(), -10752),
+ "mov z25.d, p13/m, #-42, lsl #8");
+
+ COMPARE(fcpy(z20.VnH(), p11.Merging(), 29.0),
+ "fmov z20.h, p11/m, #0x3d (29.0000)");
+ COMPARE(fmov(z20.VnS(), p11.Merging(), -31.0),
+ "fmov z20.s, p11/m, #0xbf (-31.0000)");
+ COMPARE(fcpy(z20.VnD(), p11.Merging(), 1.0),
+ "fmov z20.d, p11/m, #0x70 (1.0000)");
CLEANUP();
}
@@ -3018,16 +3057,16 @@ TEST(sve_fmov_zero) {
SETUP();
// Predicated `fmov` is an alias for either `fcpy` or `cpy`.
- COMPARE_PREFIX(fmov(z13.VnS(), p0.Merging(), 1.0),
- "fmov z13.s, p0/m, #0x70 (1.0000)");
- COMPARE_PREFIX(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0");
+ COMPARE(fmov(z13.VnS(), p0.Merging(), 1.0),
+ "fmov z13.s, p0/m, #0x70 (1.0000)");
+ COMPARE(fmov(z13.VnS(), p0.Merging(), 0.0), "mov z13.s, p0/m, #0");
COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 1.0),
"fmov z13.d, p0/m, #0x70 (1.0000)");
COMPARE_MACRO(Fmov(z13.VnD(), p0.Merging(), 0.0), "mov z13.d, p0/m, #0");
// Unpredicated `fmov` is an alias for either `fdup` or `dup`.
- COMPARE_PREFIX(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)");
- COMPARE_PREFIX(fmov(z13.VnS(), 0.0), "mov z13.s, #0");
+ COMPARE(fmov(z13.VnS(), 1.0), "fmov z13.s, #0x70 (1.0000)");
+ COMPARE(fmov(z13.VnS(), 0.0), "mov z13.s, #0");
COMPARE_MACRO(Fmov(z13.VnD(), 1.0), "fmov z13.d, #0x70 (1.0000)");
COMPARE_MACRO(Fmov(z13.VnD(), 0.0), "mov z13.d, #0");
@@ -3043,92 +3082,90 @@ TEST(sve_fmov_zero) {
TEST(sve_int_wide_imm_unpredicated) {
SETUP();
- COMPARE_PREFIX(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0");
- COMPARE_PREFIX(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255");
- COMPARE_PREFIX(add(z14.VnS(), z14.VnS(), 256),
- "add z14.s, z14.s, #1, lsl #8");
- COMPARE_PREFIX(add(z15.VnD(), z15.VnD(), 255 * 256),
- "add z15.d, z15.d, #255, lsl #8");
-
- COMPARE_PREFIX(dup(z6.VnB(), -128), "mov z6.b, #-128");
- COMPARE_PREFIX(dup(z7.VnH(), 127), "mov z7.h, #127");
- COMPARE_PREFIX(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8");
- COMPARE_PREFIX(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8");
- COMPARE_PREFIX(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8");
- COMPARE_PREFIX(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8");
-
- COMPARE_PREFIX(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124");
- COMPARE_PREFIX(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131");
- COMPARE_PREFIX(sqadd(z9.VnS(), z9.VnS(), 252 * 256),
- "sqadd z9.s, z9.s, #252, lsl #8");
- COMPARE_PREFIX(sqadd(z10.VnD(), z10.VnD(), 20 * 256),
- "sqadd z10.d, z10.d, #20, lsl #8");
-
- COMPARE_PREFIX(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132");
- COMPARE_PREFIX(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251");
- COMPARE_PREFIX(sqsub(z29.VnS(), z29.VnS(), 21 * 256),
- "sqsub z29.s, z29.s, #21, lsl #8");
- COMPARE_PREFIX(sqsub(z28.VnD(), z28.VnD(), 123 * 256),
- "sqsub z28.d, z28.d, #123, lsl #8");
-
- COMPARE_PREFIX(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250");
- COMPARE_PREFIX(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22");
- COMPARE_PREFIX(subr(z22.VnS(), z22.VnS(), 122 * 256),
- "subr z22.s, z22.s, #122, lsl #8");
- COMPARE_PREFIX(subr(z23.VnD(), z23.VnD(), 133 * 256),
- "subr z23.d, z23.d, #133, lsl #8");
-
- COMPARE_PREFIX(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23");
- COMPARE_PREFIX(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121");
- COMPARE_PREFIX(sub(z20.VnS(), z20.VnS(), 134 * 256),
- "sub z20.s, z20.s, #134, lsl #8");
- COMPARE_PREFIX(sub(z21.VnD(), z21.VnD(), 249 * 256),
- "sub z21.d, z21.d, #249, lsl #8");
-
- COMPARE_PREFIX(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246");
- COMPARE_PREFIX(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26");
- COMPARE_PREFIX(uqadd(z23.VnS(), z23.VnS(), 118 * 256),
- "uqadd z23.s, z23.s, #118, lsl #8");
- COMPARE_PREFIX(uqadd(z24.VnD(), z24.VnD(), 137 * 256),
- "uqadd z24.d, z24.d, #137, lsl #8");
-
- COMPARE_PREFIX(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27");
- COMPARE_PREFIX(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117");
- COMPARE_PREFIX(uqsub(z12.VnS(), z12.VnS(), 138 * 256),
- "uqsub z12.s, z12.s, #138, lsl #8");
- COMPARE_PREFIX(uqsub(z13.VnD(), z13.VnD(), 245 * 256),
- "uqsub z13.d, z13.d, #245, lsl #8");
-
- COMPARE_PREFIX(fdup(z26.VnH(), Float16(-5.0f)),
- "fmov z26.h, #0x94 (-5.0000)");
- COMPARE_PREFIX(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)");
- COMPARE_PREFIX(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
- COMPARE_PREFIX(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
-
- COMPARE_PREFIX(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128");
- COMPARE_PREFIX(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1");
- COMPARE_PREFIX(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17");
- COMPARE_PREFIX(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127");
-
- COMPARE_PREFIX(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2");
- COMPARE_PREFIX(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18");
- COMPARE_PREFIX(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126");
- COMPARE_PREFIX(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127");
-
- COMPARE_PREFIX(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19");
- COMPARE_PREFIX(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125");
- COMPARE_PREFIX(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126");
- COMPARE_PREFIX(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3");
-
- COMPARE_PREFIX(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120");
- COMPARE_PREFIX(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135");
- COMPARE_PREFIX(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248");
- COMPARE_PREFIX(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24");
-
- COMPARE_PREFIX(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136");
- COMPARE_PREFIX(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247");
- COMPARE_PREFIX(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25");
- COMPARE_PREFIX(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119");
+ COMPARE(add(z12.VnB(), z12.VnB(), 0), "add z12.b, z12.b, #0");
+ COMPARE(add(z13.VnH(), z13.VnH(), 255), "add z13.h, z13.h, #255");
+ COMPARE(add(z14.VnS(), z14.VnS(), 256), "add z14.s, z14.s, #1, lsl #8");
+ COMPARE(add(z15.VnD(), z15.VnD(), 255 * 256),
+ "add z15.d, z15.d, #255, lsl #8");
+
+ COMPARE(dup(z6.VnB(), -128), "mov z6.b, #-128");
+ COMPARE(dup(z7.VnH(), 127), "mov z7.h, #127");
+ COMPARE(dup(z8.VnS(), -128 * 256), "mov z8.s, #-128, lsl #8");
+ COMPARE(dup(z9.VnD(), 127 * 256), "mov z9.d, #127, lsl #8");
+ COMPARE(mov(z8.VnS(), -128 * 256, -1), "mov z8.s, #-128, lsl #8");
+ COMPARE(mov(z9.VnD(), 127 * 256, -1), "mov z9.d, #127, lsl #8");
+
+ COMPARE(sqadd(z7.VnB(), z7.VnB(), 124), "sqadd z7.b, z7.b, #124");
+ COMPARE(sqadd(z8.VnH(), z8.VnH(), 131), "sqadd z8.h, z8.h, #131");
+ COMPARE(sqadd(z9.VnS(), z9.VnS(), 252 * 256),
+ "sqadd z9.s, z9.s, #252, lsl #8");
+ COMPARE(sqadd(z10.VnD(), z10.VnD(), 20 * 256),
+ "sqadd z10.d, z10.d, #20, lsl #8");
+
+ COMPARE(sqsub(z31.VnB(), z31.VnB(), 132), "sqsub z31.b, z31.b, #132");
+ COMPARE(sqsub(z30.VnH(), z30.VnH(), 251), "sqsub z30.h, z30.h, #251");
+ COMPARE(sqsub(z29.VnS(), z29.VnS(), 21 * 256),
+ "sqsub z29.s, z29.s, #21, lsl #8");
+ COMPARE(sqsub(z28.VnD(), z28.VnD(), 123 * 256),
+ "sqsub z28.d, z28.d, #123, lsl #8");
+
+ COMPARE(subr(z20.VnB(), z20.VnB(), 250), "subr z20.b, z20.b, #250");
+ COMPARE(subr(z21.VnH(), z21.VnH(), 22), "subr z21.h, z21.h, #22");
+ COMPARE(subr(z22.VnS(), z22.VnS(), 122 * 256),
+ "subr z22.s, z22.s, #122, lsl #8");
+ COMPARE(subr(z23.VnD(), z23.VnD(), 133 * 256),
+ "subr z23.d, z23.d, #133, lsl #8");
+
+ COMPARE(sub(z18.VnB(), z18.VnB(), 23), "sub z18.b, z18.b, #23");
+ COMPARE(sub(z19.VnH(), z19.VnH(), 121), "sub z19.h, z19.h, #121");
+ COMPARE(sub(z20.VnS(), z20.VnS(), 134 * 256),
+ "sub z20.s, z20.s, #134, lsl #8");
+ COMPARE(sub(z21.VnD(), z21.VnD(), 249 * 256),
+ "sub z21.d, z21.d, #249, lsl #8");
+
+ COMPARE(uqadd(z21.VnB(), z21.VnB(), 246), "uqadd z21.b, z21.b, #246");
+ COMPARE(uqadd(z22.VnH(), z22.VnH(), 26), "uqadd z22.h, z22.h, #26");
+ COMPARE(uqadd(z23.VnS(), z23.VnS(), 118 * 256),
+ "uqadd z23.s, z23.s, #118, lsl #8");
+ COMPARE(uqadd(z24.VnD(), z24.VnD(), 137 * 256),
+ "uqadd z24.d, z24.d, #137, lsl #8");
+
+ COMPARE(uqsub(z10.VnB(), z10.VnB(), 27), "uqsub z10.b, z10.b, #27");
+ COMPARE(uqsub(z11.VnH(), z11.VnH(), 117), "uqsub z11.h, z11.h, #117");
+ COMPARE(uqsub(z12.VnS(), z12.VnS(), 138 * 256),
+ "uqsub z12.s, z12.s, #138, lsl #8");
+ COMPARE(uqsub(z13.VnD(), z13.VnD(), 245 * 256),
+ "uqsub z13.d, z13.d, #245, lsl #8");
+
+ COMPARE(fdup(z26.VnH(), Float16(-5.0f)), "fmov z26.h, #0x94 (-5.0000)");
+ COMPARE(fdup(z27.VnS(), -13.0f), "fmov z27.s, #0xaa (-13.0000)");
+ COMPARE(fdup(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
+ COMPARE(fmov(z28.VnD(), 1.0f), "fmov z28.d, #0x70 (1.0000)");
+
+ COMPARE(mul(z15.VnB(), z15.VnB(), -128), "mul z15.b, z15.b, #-128");
+ COMPARE(mul(z16.VnH(), z16.VnH(), -1), "mul z16.h, z16.h, #-1");
+ COMPARE(mul(z17.VnS(), z17.VnS(), 17), "mul z17.s, z17.s, #17");
+ COMPARE(mul(z18.VnD(), z18.VnD(), 127), "mul z18.d, z18.d, #127");
+
+ COMPARE(smax(z7.VnB(), z7.VnB(), -2), "smax z7.b, z7.b, #-2");
+ COMPARE(smax(z8.VnH(), z8.VnH(), 18), "smax z8.h, z8.h, #18");
+ COMPARE(smax(z9.VnS(), z9.VnS(), 126), "smax z9.s, z9.s, #126");
+ COMPARE(smax(z10.VnD(), z10.VnD(), -127), "smax z10.d, z10.d, #-127");
+
+ COMPARE(smin(z5.VnB(), z5.VnB(), 19), "smin z5.b, z5.b, #19");
+ COMPARE(smin(z6.VnH(), z6.VnH(), 125), "smin z6.h, z6.h, #125");
+ COMPARE(smin(z7.VnS(), z7.VnS(), -126), "smin z7.s, z7.s, #-126");
+ COMPARE(smin(z8.VnD(), z8.VnD(), -3), "smin z8.d, z8.d, #-3");
+
+ COMPARE(umax(z15.VnB(), z15.VnB(), 120), "umax z15.b, z15.b, #120");
+ COMPARE(umax(z16.VnH(), z16.VnH(), 135), "umax z16.h, z16.h, #135");
+ COMPARE(umax(z17.VnS(), z17.VnS(), 248), "umax z17.s, z17.s, #248");
+ COMPARE(umax(z18.VnD(), z18.VnD(), 24), "umax z18.d, z18.d, #24");
+
+ COMPARE(umin(z22.VnB(), z22.VnB(), 136), "umin z22.b, z22.b, #136");
+ COMPARE(umin(z23.VnH(), z23.VnH(), 247), "umin z23.h, z23.h, #247");
+ COMPARE(umin(z24.VnS(), z24.VnS(), 25), "umin z24.s, z24.s, #25");
+ COMPARE(umin(z25.VnD(), z25.VnD(), 119), "umin z25.d, z25.d, #119");
CLEANUP();
}
@@ -3367,120 +3404,90 @@ TEST(sve_mem_32bit_gather_and_unsized_contiguous) {
SETUP();
// 32-bit gather load in scalar-plus-vector vform with unscaled offset.
- COMPARE_PREFIX(ld1b(z9.VnS(),
- p5.Zeroing(),
- SVEMemOperand(x2, z1.VnS(), SXTW)),
- "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]");
- COMPARE_PREFIX(ld1b(z9.VnS(),
- p5.Zeroing(),
- SVEMemOperand(sp, z1.VnS(), UXTW)),
- "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]");
- COMPARE_PREFIX(ld1h(z17.VnS(),
- p2.Zeroing(),
- SVEMemOperand(x11, z24.VnS(), SXTW)),
- "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]");
- COMPARE_PREFIX(ld1w(z22.VnS(),
- p6.Zeroing(),
- SVEMemOperand(sp, z5.VnS(), UXTW)),
- "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]");
- COMPARE_PREFIX(ld1sb(z12.VnS(),
- p7.Zeroing(),
- SVEMemOperand(x17, z23.VnS(), UXTW)),
- "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]");
- COMPARE_PREFIX(ld1sb(z22.VnS(),
- p3.Zeroing(),
- SVEMemOperand(x23, z23.VnS(), SXTW)),
- "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]");
- COMPARE_PREFIX(ld1sh(z11.VnS(),
- p2.Zeroing(),
- SVEMemOperand(x18, z10.VnS(), UXTW)),
- "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]");
+ COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(x2, z1.VnS(), SXTW)),
+ "ld1b {z9.s}, p5/z, [x2, z1.s, sxtw]");
+ COMPARE(ld1b(z9.VnS(), p5.Zeroing(), SVEMemOperand(sp, z1.VnS(), UXTW)),
+ "ld1b {z9.s}, p5/z, [sp, z1.s, uxtw]");
+ COMPARE(ld1h(z17.VnS(), p2.Zeroing(), SVEMemOperand(x11, z24.VnS(), SXTW)),
+ "ld1h {z17.s}, p2/z, [x11, z24.s, sxtw]");
+ COMPARE(ld1w(z22.VnS(), p6.Zeroing(), SVEMemOperand(sp, z5.VnS(), UXTW)),
+ "ld1w {z22.s}, p6/z, [sp, z5.s, uxtw]");
+ COMPARE(ld1sb(z12.VnS(), p7.Zeroing(), SVEMemOperand(x17, z23.VnS(), UXTW)),
+ "ld1sb {z12.s}, p7/z, [x17, z23.s, uxtw]");
+ COMPARE(ld1sb(z22.VnS(), p3.Zeroing(), SVEMemOperand(x23, z23.VnS(), SXTW)),
+ "ld1sb {z22.s}, p3/z, [x23, z23.s, sxtw]");
+ COMPARE(ld1sh(z11.VnS(), p2.Zeroing(), SVEMemOperand(x18, z10.VnS(), UXTW)),
+ "ld1sh {z11.s}, p2/z, [x18, z10.s, uxtw]");
// 32-bit gather load in scalar-plus-vector vform with scaled offset.
- COMPARE_PREFIX(ld1h(z9.VnS(),
- p3.Zeroing(),
- SVEMemOperand(sp, z4.VnS(), UXTW, 1)),
- "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]");
- COMPARE_PREFIX(ld1w(z0.VnS(),
- p6.Zeroing(),
- SVEMemOperand(x28, z21.VnS(), SXTW, 2)),
- "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]");
- COMPARE_PREFIX(ld1sh(z11.VnS(),
- p4.Zeroing(),
- SVEMemOperand(sp, z0.VnS(), SXTW, 1)),
- "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]");
+ COMPARE(ld1h(z9.VnS(), p3.Zeroing(), SVEMemOperand(sp, z4.VnS(), UXTW, 1)),
+ "ld1h {z9.s}, p3/z, [sp, z4.s, uxtw #1]");
+ COMPARE(ld1w(z0.VnS(), p6.Zeroing(), SVEMemOperand(x28, z21.VnS(), SXTW, 2)),
+ "ld1w {z0.s}, p6/z, [x28, z21.s, sxtw #2]");
+ COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(sp, z0.VnS(), SXTW, 1)),
+ "ld1sh {z11.s}, p4/z, [sp, z0.s, sxtw #1]");
// 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit
// unpacked unscaled offset.
- COMPARE_PREFIX(ldff1b(z18.VnS(),
- p6.Zeroing(),
- SVEMemOperand(x27, z24.VnS(), UXTW)),
- "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]");
- COMPARE_PREFIX(ldff1h(z28.VnS(),
- p6.Zeroing(),
- SVEMemOperand(x1, z30.VnS(), UXTW)),
- "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]");
- COMPARE_PREFIX(ldff1w(z12.VnS(),
- p3.Zeroing(),
- SVEMemOperand(x25, z27.VnS(), SXTW)),
- "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]");
- COMPARE_PREFIX(ldff1sb(z15.VnS(),
- p5.Zeroing(),
- SVEMemOperand(x5, z14.VnS(), SXTW)),
- "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]");
- COMPARE_PREFIX(ldff1sh(z18.VnS(),
- p4.Zeroing(),
- SVEMemOperand(x25, z25.VnS(), SXTW)),
- "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]");
+ COMPARE(ldff1b(z18.VnS(), p6.Zeroing(), SVEMemOperand(x27, z24.VnS(), UXTW)),
+ "ldff1b {z18.s}, p6/z, [x27, z24.s, uxtw]");
+ COMPARE(ldff1h(z28.VnS(), p6.Zeroing(), SVEMemOperand(x1, z30.VnS(), UXTW)),
+ "ldff1h {z28.s}, p6/z, [x1, z30.s, uxtw]");
+ COMPARE(ldff1w(z12.VnS(), p3.Zeroing(), SVEMemOperand(x25, z27.VnS(), SXTW)),
+ "ldff1w {z12.s}, p3/z, [x25, z27.s, sxtw]");
+ COMPARE(ldff1sb(z15.VnS(), p5.Zeroing(), SVEMemOperand(x5, z14.VnS(), SXTW)),
+ "ldff1sb {z15.s}, p5/z, [x5, z14.s, sxtw]");
+ COMPARE(ldff1sh(z18.VnS(), p4.Zeroing(), SVEMemOperand(x25, z25.VnS(), SXTW)),
+ "ldff1sh {z18.s}, p4/z, [x25, z25.s, sxtw]");
// 32-bit gather first-fault load in scalar-plus-vector vform with 32-bit
// scaled offset.
- COMPARE_PREFIX(ldff1h(z25.VnS(),
- p3.Zeroing(),
- SVEMemOperand(x17, z15.VnS(), SXTW, 1)),
- "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]");
- COMPARE_PREFIX(ldff1w(z5.VnS(),
- p4.Zeroing(),
- SVEMemOperand(x23, z31.VnS(), UXTW, 2)),
- "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]");
- COMPARE_PREFIX(ldff1sh(z10.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, z15.VnS(), UXTW, 1)),
- "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]");
+ COMPARE(ldff1h(z25.VnS(),
+ p3.Zeroing(),
+ SVEMemOperand(x17, z15.VnS(), SXTW, 1)),
+ "ldff1h {z25.s}, p3/z, [x17, z15.s, sxtw #1]");
+ COMPARE(ldff1w(z5.VnS(),
+ p4.Zeroing(),
+ SVEMemOperand(x23, z31.VnS(), UXTW, 2)),
+ "ldff1w {z5.s}, p4/z, [x23, z31.s, uxtw #2]");
+ COMPARE(ldff1sh(z10.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, z15.VnS(), UXTW, 1)),
+ "ldff1sh {z10.s}, p0/z, [x19, z15.s, uxtw #1]");
// Load and broadcast data to vector.
- COMPARE_PREFIX(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)),
- "ld1rb {z2.h}, p0/z, [x30]");
- COMPARE_PREFIX(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)),
- "ld1rb {z14.s}, p2/z, [x11, #63]");
- COMPARE_PREFIX(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)),
- "ld1rb {z27.d}, p1/z, [x29, #2]");
- COMPARE_PREFIX(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)),
- "ld1rb {z0.b}, p3/z, [sp, #59]");
- COMPARE_PREFIX(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)),
- "ld1rh {z19.h}, p5/z, [x1]");
- COMPARE_PREFIX(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)),
- "ld1rh {z4.s}, p7/z, [x29, #126]");
- COMPARE_PREFIX(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)),
- "ld1rh {z24.d}, p0/z, [sp, #78]");
- COMPARE_PREFIX(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)),
- "ld1rw {z19.s}, p5/z, [x4, #252]");
- COMPARE_PREFIX(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)),
- "ld1rw {z13.d}, p3/z, [x2, #100]");
- COMPARE_PREFIX(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)),
- "ld1rd {z19.d}, p7/z, [x14, #504]");
- COMPARE_PREFIX(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)),
- "ld1rsb {z16.h}, p1/z, [x29]");
- COMPARE_PREFIX(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)),
- "ld1rsb {z8.s}, p6/z, [sp, #33]");
- COMPARE_PREFIX(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)),
- "ld1rsb {z25.d}, p2/z, [x18, #63]");
- COMPARE_PREFIX(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)),
- "ld1rsh {z11.s}, p5/z, [x14, #2]");
- COMPARE_PREFIX(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)),
- "ld1rsh {z28.d}, p1/z, [x19, #124]");
- COMPARE_PREFIX(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)),
- "ld1rsw {z23.d}, p4/z, [x10, #8]");
+ COMPARE(ld1rb(z2.VnH(), p0.Zeroing(), SVEMemOperand(x30, 0)),
+ "ld1rb {z2.h}, p0/z, [x30]");
+ COMPARE(ld1rb(z14.VnS(), p2.Zeroing(), SVEMemOperand(x11, 63)),
+ "ld1rb {z14.s}, p2/z, [x11, #63]");
+ COMPARE(ld1rb(z27.VnD(), p1.Zeroing(), SVEMemOperand(x29, 2)),
+ "ld1rb {z27.d}, p1/z, [x29, #2]");
+ COMPARE(ld1rb(z0.VnB(), p3.Zeroing(), SVEMemOperand(sp, 59)),
+ "ld1rb {z0.b}, p3/z, [sp, #59]");
+ COMPARE(ld1rh(z19.VnH(), p5.Zeroing(), SVEMemOperand(x1, 0)),
+ "ld1rh {z19.h}, p5/z, [x1]");
+ COMPARE(ld1rh(z4.VnS(), p7.Zeroing(), SVEMemOperand(x29, 126)),
+ "ld1rh {z4.s}, p7/z, [x29, #126]");
+ COMPARE(ld1rh(z24.VnD(), p0.Zeroing(), SVEMemOperand(sp, 78)),
+ "ld1rh {z24.d}, p0/z, [sp, #78]");
+ COMPARE(ld1rw(z19.VnS(), p5.Zeroing(), SVEMemOperand(x4, 252)),
+ "ld1rw {z19.s}, p5/z, [x4, #252]");
+ COMPARE(ld1rw(z13.VnD(), p3.Zeroing(), SVEMemOperand(x2, 100)),
+ "ld1rw {z13.d}, p3/z, [x2, #100]");
+ COMPARE(ld1rd(z19.VnD(), p7.Zeroing(), SVEMemOperand(x14, 504)),
+ "ld1rd {z19.d}, p7/z, [x14, #504]");
+ COMPARE(ld1rsb(z16.VnH(), p1.Zeroing(), SVEMemOperand(x29, 0)),
+ "ld1rsb {z16.h}, p1/z, [x29]");
+ COMPARE(ld1rsb(z8.VnS(), p6.Zeroing(), SVEMemOperand(sp, 33)),
+ "ld1rsb {z8.s}, p6/z, [sp, #33]");
+ COMPARE(ld1rsb(z25.VnD(), p2.Zeroing(), SVEMemOperand(x18, 63)),
+ "ld1rsb {z25.d}, p2/z, [x18, #63]");
+ COMPARE(ld1rsh(z11.VnS(), p5.Zeroing(), SVEMemOperand(x14, 2)),
+ "ld1rsh {z11.s}, p5/z, [x14, #2]");
+ COMPARE(ld1rsh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x19, 124)),
+ "ld1rsh {z28.d}, p1/z, [x19, #124]");
+ COMPARE(ld1rsw(z23.VnD(), p4.Zeroing(), SVEMemOperand(x10, 8)),
+ "ld1rsw {z23.d}, p4/z, [x10, #8]");
CLEANUP();
}
@@ -3514,34 +3521,34 @@ TEST(sve_mem_32bit_gather_and_unsized_contiguous_macro) {
TEST(sve_mem_64bit_gather_vector_plus_immediate) {
SETUP();
- COMPARE_PREFIX(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)),
- "ld1b {z2.d}, p2/z, [z12.d, #31]");
- COMPARE_PREFIX(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)),
- "ld1h {z30.d}, p7/z, [z28.d, #10]");
- COMPARE_PREFIX(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)),
- "ld1w {z10.d}, p5/z, [z4.d, #124]");
- COMPARE_PREFIX(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)),
- "ld1d {z13.d}, p3/z, [z19.d, #248]");
- COMPARE_PREFIX(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())),
- "ld1sb {z16.d}, p7/z, [z31.d]");
- COMPARE_PREFIX(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)),
- "ld1sh {z20.d}, p2/z, [z2.d, #62]");
- COMPARE_PREFIX(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())),
- "ld1sw {z2.d}, p7/z, [z25.d]");
- COMPARE_PREFIX(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)),
- "ldff1b {z24.d}, p5/z, [z8.d]");
- COMPARE_PREFIX(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())),
- "ldff1h {z9.d}, p3/z, [z19.d]");
- COMPARE_PREFIX(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)),
- "ldff1w {z26.d}, p6/z, [z15.d, #4]");
- COMPARE_PREFIX(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())),
- "ldff1d {z19.d}, p1/z, [z14.d]");
- COMPARE_PREFIX(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)),
- "ldff1sb {z26.d}, p5/z, [z14.d, #21]");
- COMPARE_PREFIX(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)),
- "ldff1sh {z6.d}, p3/z, [z19.d, #42]");
- COMPARE_PREFIX(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)),
- "ldff1sw {z19.d}, p7/z, [z14.d, #84]");
+ COMPARE(ld1b(z2.VnD(), p2.Zeroing(), SVEMemOperand(z12.VnD(), 31)),
+ "ld1b {z2.d}, p2/z, [z12.d, #31]");
+ COMPARE(ld1h(z30.VnD(), p7.Zeroing(), SVEMemOperand(z28.VnD(), 10)),
+ "ld1h {z30.d}, p7/z, [z28.d, #10]");
+ COMPARE(ld1w(z10.VnD(), p5.Zeroing(), SVEMemOperand(z4.VnD(), 124)),
+ "ld1w {z10.d}, p5/z, [z4.d, #124]");
+ COMPARE(ld1d(z13.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 248)),
+ "ld1d {z13.d}, p3/z, [z19.d, #248]");
+ COMPARE(ld1sb(z16.VnD(), p7.Zeroing(), SVEMemOperand(z31.VnD())),
+ "ld1sb {z16.d}, p7/z, [z31.d]");
+ COMPARE(ld1sh(z20.VnD(), p2.Zeroing(), SVEMemOperand(z2.VnD(), 62)),
+ "ld1sh {z20.d}, p2/z, [z2.d, #62]");
+ COMPARE(ld1sw(z2.VnD(), p7.Zeroing(), SVEMemOperand(z25.VnD())),
+ "ld1sw {z2.d}, p7/z, [z25.d]");
+ COMPARE(ldff1b(z24.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), 0)),
+ "ldff1b {z24.d}, p5/z, [z8.d]");
+ COMPARE(ldff1h(z9.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD())),
+ "ldff1h {z9.d}, p3/z, [z19.d]");
+ COMPARE(ldff1w(z26.VnD(), p6.Zeroing(), SVEMemOperand(z15.VnD(), 4)),
+ "ldff1w {z26.d}, p6/z, [z15.d, #4]");
+ COMPARE(ldff1d(z19.VnD(), p1.Zeroing(), SVEMemOperand(z14.VnD())),
+ "ldff1d {z19.d}, p1/z, [z14.d]");
+ COMPARE(ldff1sb(z26.VnD(), p5.Zeroing(), SVEMemOperand(z14.VnD(), 21)),
+ "ldff1sb {z26.d}, p5/z, [z14.d, #21]");
+ COMPARE(ldff1sh(z6.VnD(), p3.Zeroing(), SVEMemOperand(z19.VnD(), 42)),
+ "ldff1sh {z6.d}, p3/z, [z19.d, #42]");
+ COMPARE(ldff1sw(z19.VnD(), p7.Zeroing(), SVEMemOperand(z14.VnD(), 84)),
+ "ldff1sw {z19.d}, p7/z, [z14.d, #84]");
CLEANUP();
}
@@ -3629,91 +3636,57 @@ TEST(sve_mem_64bit_gather_vector_plus_immediate_macro) {
TEST(sve_mem_64bit_gather_scalar_plus_vector) {
SETUP();
- COMPARE_PREFIX(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())),
- "ld1b {z30.d}, p6/z, [sp, z24.d]");
- COMPARE_PREFIX(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())),
- "ld1d {z18.d}, p5/z, [x11, z11.d]");
- COMPARE_PREFIX(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())),
- "ld1h {z2.d}, p3/z, [x16, z18.d]");
- COMPARE_PREFIX(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())),
- "ld1sb {z11.d}, p3/z, [x24, z21.d]");
- COMPARE_PREFIX(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())),
- "ld1sh {z7.d}, p7/z, [x28, z23.d]");
- COMPARE_PREFIX(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())),
- "ld1sw {z29.d}, p7/z, [x27, z4.d]");
- COMPARE_PREFIX(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())),
- "ld1w {z19.d}, p1/z, [x27, z4.d]");
-
- COMPARE_PREFIX(ld1d(z20.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x3, z15.VnD(), LSL, 3)),
- "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]");
- COMPARE_PREFIX(ld1h(z24.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x6, z11.VnD(), LSL, 1)),
- "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]");
- COMPARE_PREFIX(ld1sh(z22.VnD(),
- p6.Zeroing(),
- SVEMemOperand(x7, z31.VnD(), LSL, 1)),
- "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]");
- COMPARE_PREFIX(ld1sw(z9.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x2, z27.VnD(), LSL, 2)),
- "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]");
- COMPARE_PREFIX(ld1w(z9.VnD(),
- p2.Zeroing(),
- SVEMemOperand(x0, z0.VnD(), LSL, 2)),
- "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]");
-
- COMPARE_PREFIX(ld1b(z19.VnD(),
- p5.Zeroing(),
- SVEMemOperand(x21, z29.VnD(), UXTW)),
- "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]");
- COMPARE_PREFIX(ld1d(z9.VnD(),
- p5.Zeroing(),
- SVEMemOperand(x5, z21.VnD(), SXTW)),
- "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]");
- COMPARE_PREFIX(ld1h(z26.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, z10.VnD(), UXTW)),
- "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]");
- COMPARE_PREFIX(ld1sb(z4.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x24, z15.VnD(), SXTW)),
- "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]");
- COMPARE_PREFIX(ld1sh(z9.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x0, z12.VnD(), UXTW)),
- "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]");
- COMPARE_PREFIX(ld1sw(z19.VnD(),
- p2.Zeroing(),
- SVEMemOperand(x19, z16.VnD(), SXTW)),
- "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]");
- COMPARE_PREFIX(ld1w(z13.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x8, z10.VnD(), UXTW)),
- "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]");
-
- COMPARE_PREFIX(ld1d(z25.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x14, z0.VnD(), UXTW, 3)),
- "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]");
- COMPARE_PREFIX(ld1h(z21.VnD(),
- p5.Zeroing(),
- SVEMemOperand(x13, z8.VnD(), SXTW, 1)),
- "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]");
- COMPARE_PREFIX(ld1sh(z29.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x9, z10.VnD(), UXTW, 1)),
- "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]");
- COMPARE_PREFIX(ld1sw(z5.VnD(),
- p2.Zeroing(),
- SVEMemOperand(x1, z23.VnD(), SXTW, 2)),
- "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]");
- COMPARE_PREFIX(ld1w(z21.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x7, z8.VnD(), UXTW, 2)),
- "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]");
+ COMPARE(ld1b(z30.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD())),
+ "ld1b {z30.d}, p6/z, [sp, z24.d]");
+ COMPARE(ld1d(z18.VnD(), p5.Zeroing(), SVEMemOperand(x11, z11.VnD())),
+ "ld1d {z18.d}, p5/z, [x11, z11.d]");
+ COMPARE(ld1h(z2.VnD(), p3.Zeroing(), SVEMemOperand(x16, z18.VnD())),
+ "ld1h {z2.d}, p3/z, [x16, z18.d]");
+ COMPARE(ld1sb(z11.VnD(), p3.Zeroing(), SVEMemOperand(x24, z21.VnD())),
+ "ld1sb {z11.d}, p3/z, [x24, z21.d]");
+ COMPARE(ld1sh(z7.VnD(), p7.Zeroing(), SVEMemOperand(x28, z23.VnD())),
+ "ld1sh {z7.d}, p7/z, [x28, z23.d]");
+ COMPARE(ld1sw(z29.VnD(), p7.Zeroing(), SVEMemOperand(x27, z4.VnD())),
+ "ld1sw {z29.d}, p7/z, [x27, z4.d]");
+ COMPARE(ld1w(z19.VnD(), p1.Zeroing(), SVEMemOperand(x27, z4.VnD())),
+ "ld1w {z19.d}, p1/z, [x27, z4.d]");
+
+ COMPARE(ld1d(z20.VnD(), p3.Zeroing(), SVEMemOperand(x3, z15.VnD(), LSL, 3)),
+ "ld1d {z20.d}, p3/z, [x3, z15.d, lsl #3]");
+ COMPARE(ld1h(z24.VnD(), p4.Zeroing(), SVEMemOperand(x6, z11.VnD(), LSL, 1)),
+ "ld1h {z24.d}, p4/z, [x6, z11.d, lsl #1]");
+ COMPARE(ld1sh(z22.VnD(), p6.Zeroing(), SVEMemOperand(x7, z31.VnD(), LSL, 1)),
+ "ld1sh {z22.d}, p6/z, [x7, z31.d, lsl #1]");
+ COMPARE(ld1sw(z9.VnD(), p0.Zeroing(), SVEMemOperand(x2, z27.VnD(), LSL, 2)),
+ "ld1sw {z9.d}, p0/z, [x2, z27.d, lsl #2]");
+ COMPARE(ld1w(z9.VnD(), p2.Zeroing(), SVEMemOperand(x0, z0.VnD(), LSL, 2)),
+ "ld1w {z9.d}, p2/z, [x0, z0.d, lsl #2]");
+
+ COMPARE(ld1b(z19.VnD(), p5.Zeroing(), SVEMemOperand(x21, z29.VnD(), UXTW)),
+ "ld1b {z19.d}, p5/z, [x21, z29.d, uxtw]");
+ COMPARE(ld1d(z9.VnD(), p5.Zeroing(), SVEMemOperand(x5, z21.VnD(), SXTW)),
+ "ld1d {z9.d}, p5/z, [x5, z21.d, sxtw]");
+ COMPARE(ld1h(z26.VnD(), p3.Zeroing(), SVEMemOperand(x1, z10.VnD(), UXTW)),
+ "ld1h {z26.d}, p3/z, [x1, z10.d, uxtw]");
+ COMPARE(ld1sb(z4.VnD(), p1.Zeroing(), SVEMemOperand(x24, z15.VnD(), SXTW)),
+ "ld1sb {z4.d}, p1/z, [x24, z15.d, sxtw]");
+ COMPARE(ld1sh(z9.VnD(), p1.Zeroing(), SVEMemOperand(x0, z12.VnD(), UXTW)),
+ "ld1sh {z9.d}, p1/z, [x0, z12.d, uxtw]");
+ COMPARE(ld1sw(z19.VnD(), p2.Zeroing(), SVEMemOperand(x19, z16.VnD(), SXTW)),
+ "ld1sw {z19.d}, p2/z, [x19, z16.d, sxtw]");
+ COMPARE(ld1w(z13.VnD(), p3.Zeroing(), SVEMemOperand(x8, z10.VnD(), UXTW)),
+ "ld1w {z13.d}, p3/z, [x8, z10.d, uxtw]");
+
+ COMPARE(ld1d(z25.VnD(), p3.Zeroing(), SVEMemOperand(x14, z0.VnD(), UXTW, 3)),
+ "ld1d {z25.d}, p3/z, [x14, z0.d, uxtw #3]");
+ COMPARE(ld1h(z21.VnD(), p5.Zeroing(), SVEMemOperand(x13, z8.VnD(), SXTW, 1)),
+ "ld1h {z21.d}, p5/z, [x13, z8.d, sxtw #1]");
+ COMPARE(ld1sh(z29.VnD(), p0.Zeroing(), SVEMemOperand(x9, z10.VnD(), UXTW, 1)),
+ "ld1sh {z29.d}, p0/z, [x9, z10.d, uxtw #1]");
+ COMPARE(ld1sw(z5.VnD(), p2.Zeroing(), SVEMemOperand(x1, z23.VnD(), SXTW, 2)),
+ "ld1sw {z5.d}, p2/z, [x1, z23.d, sxtw #2]");
+ COMPARE(ld1w(z21.VnD(), p1.Zeroing(), SVEMemOperand(x7, z8.VnD(), UXTW, 2)),
+ "ld1w {z21.d}, p1/z, [x7, z8.d, uxtw #2]");
CLEANUP();
}
@@ -3785,40 +3758,40 @@ TEST(sve_mem_prefetch) {
VIXL_DISAM_PREFETCH_TEST(prfd, "prfd", 3);
#undef VIXL_DISAM_PREFETCH_TEST
- COMPARE_PREFIX(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)),
- "prfb pldl1keep, p5, [z30.s]");
- COMPARE_PREFIX(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)),
- "prfb pldl1strm, p5, [x28, #-11, mul vl]");
- COMPARE_PREFIX(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)),
- "prfb pldl2keep, p6, [x30, x29]");
- COMPARE_PREFIX(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())),
- "prfb pldl2strm, p6, [x7, z12.d]");
- COMPARE_PREFIX(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)),
- "prfb pldl2strm, p6, [x7, z12.s, uxtw]");
- COMPARE_PREFIX(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)),
- "prfd pldl3keep, p5, [z11.d, #9]");
- COMPARE_PREFIX(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)),
- "prfd pldl3strm, p3, [x0]");
- COMPARE_PREFIX(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)),
- "prfd pstl1keep, p7, [x5, x5, lsl #3]");
- COMPARE_PREFIX(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)),
- "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]");
- COMPARE_PREFIX(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)),
- "prfh pstl2keep, p6, [z0.s, #31]");
- COMPARE_PREFIX(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)),
- "prfh pstl2strm, p4, [x17, #-3, mul vl]");
- COMPARE_PREFIX(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)),
- "prfh pstl3keep, p3, [x0, x0, lsl #1]");
- COMPARE_PREFIX(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)),
- "prfh pstl3strm, p4, [x20, z0.d, lsl #1]");
- COMPARE_PREFIX(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)),
- "prfw pldl1keep, p3, [z23.d, #5]");
- COMPARE_PREFIX(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)),
- "prfw pldl1strm, p1, [x4, #31, mul vl]");
- COMPARE_PREFIX(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)),
- "prfw pldl2keep, p2, [x22, x22, lsl #2]");
- COMPARE_PREFIX(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)),
- "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]");
+ COMPARE(prfb(PLDL1KEEP, p5, SVEMemOperand(z30.VnS(), 0)),
+ "prfb pldl1keep, p5, [z30.s]");
+ COMPARE(prfb(PLDL1STRM, p5, SVEMemOperand(x28, -11, SVE_MUL_VL)),
+ "prfb pldl1strm, p5, [x28, #-11, mul vl]");
+ COMPARE(prfb(PLDL2KEEP, p6, SVEMemOperand(x30, x29)),
+ "prfb pldl2keep, p6, [x30, x29]");
+ COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnD())),
+ "prfb pldl2strm, p6, [x7, z12.d]");
+ COMPARE(prfb(PLDL2STRM, p6, SVEMemOperand(x7, z12.VnS(), UXTW)),
+ "prfb pldl2strm, p6, [x7, z12.s, uxtw]");
+ COMPARE(prfd(PLDL3KEEP, p5, SVEMemOperand(z11.VnD(), 9)),
+ "prfd pldl3keep, p5, [z11.d, #9]");
+ COMPARE(prfd(PLDL3STRM, p3, SVEMemOperand(x0, 0, SVE_MUL_VL)),
+ "prfd pldl3strm, p3, [x0]");
+ COMPARE(prfd(PSTL1KEEP, p7, SVEMemOperand(x5, x5, LSL, 3)),
+ "prfd pstl1keep, p7, [x5, x5, lsl #3]");
+ COMPARE(prfd(PSTL1STRM, p1, SVEMemOperand(x19, z18.VnS(), SXTW, 3)),
+ "prfd pstl1strm, p1, [x19, z18.s, sxtw #3]");
+ COMPARE(prfh(PSTL2KEEP, p6, SVEMemOperand(z0.VnS(), 31)),
+ "prfh pstl2keep, p6, [z0.s, #31]");
+ COMPARE(prfh(PSTL2STRM, p4, SVEMemOperand(x17, -3, SVE_MUL_VL)),
+ "prfh pstl2strm, p4, [x17, #-3, mul vl]");
+ COMPARE(prfh(PSTL3KEEP, p3, SVEMemOperand(x0, x0, LSL, 1)),
+ "prfh pstl3keep, p3, [x0, x0, lsl #1]");
+ COMPARE(prfh(PSTL3STRM, p4, SVEMemOperand(x20, z0.VnD(), LSL, 1)),
+ "prfh pstl3strm, p4, [x20, z0.d, lsl #1]");
+ COMPARE(prfw(PLDL1KEEP, p3, SVEMemOperand(z23.VnD(), 5)),
+ "prfw pldl1keep, p3, [z23.d, #5]");
+ COMPARE(prfw(PLDL1STRM, p1, SVEMemOperand(x4, 31, SVE_MUL_VL)),
+ "prfw pldl1strm, p1, [x4, #31, mul vl]");
+ COMPARE(prfw(PLDL2KEEP, p2, SVEMemOperand(x22, x22, LSL, 2)),
+ "prfw pldl2keep, p2, [x22, x22, lsl #2]");
+ COMPARE(prfw(PLDL2STRM, p1, SVEMemOperand(x2, z6.VnS(), SXTW, 2)),
+ "prfw pldl2strm, p1, [x2, z6.s, sxtw #2]");
CLEANUP();
}
@@ -3827,98 +3800,72 @@ TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) {
SETUP();
// 64-bit unscaled offset.
- COMPARE_PREFIX(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())),
- "ldff1b {z18.d}, p6/z, [x27, z24.d]");
- COMPARE_PREFIX(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())),
- "ldff1h {z28.d}, p6/z, [x1, z30.d]");
- COMPARE_PREFIX(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
- "ldff1w {z12.d}, p3/z, [x25, z27.d]");
- COMPARE_PREFIX(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())),
- "ldff1d {z23.d}, p5/z, [x29, z31.d]");
- COMPARE_PREFIX(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())),
- "ldff1sb {z15.d}, p5/z, [x5, z14.d]");
- COMPARE_PREFIX(ldff1sh(z18.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x25, z25.VnD())),
- "ldff1sh {z18.d}, p4/z, [x25, z25.d]");
- COMPARE_PREFIX(ldff1sw(z12.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x25, z27.VnD())),
- "ldff1sw {z12.d}, p3/z, [x25, z27.d]");
+ COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(x27, z24.VnD())),
+ "ldff1b {z18.d}, p6/z, [x27, z24.d]");
+ COMPARE(ldff1h(z28.VnD(), p6.Zeroing(), SVEMemOperand(x1, z30.VnD())),
+ "ldff1h {z28.d}, p6/z, [x1, z30.d]");
+ COMPARE(ldff1w(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
+ "ldff1w {z12.d}, p3/z, [x25, z27.d]");
+ COMPARE(ldff1d(z23.VnD(), p5.Zeroing(), SVEMemOperand(x29, z31.VnD())),
+ "ldff1d {z23.d}, p5/z, [x29, z31.d]");
+ COMPARE(ldff1sb(z15.VnD(), p5.Zeroing(), SVEMemOperand(x5, z14.VnD())),
+ "ldff1sb {z15.d}, p5/z, [x5, z14.d]");
+ COMPARE(ldff1sh(z18.VnD(), p4.Zeroing(), SVEMemOperand(x25, z25.VnD())),
+ "ldff1sh {z18.d}, p4/z, [x25, z25.d]");
+ COMPARE(ldff1sw(z12.VnD(), p3.Zeroing(), SVEMemOperand(x25, z27.VnD())),
+ "ldff1sw {z12.d}, p3/z, [x25, z27.d]");
// 64-bit scaled offset.
- COMPARE_PREFIX(ldff1h(z25.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x17, z15.VnD(), LSL, 1)),
- "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]");
- COMPARE_PREFIX(ldff1w(z5.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x23, z31.VnD(), LSL, 2)),
- "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]");
- COMPARE_PREFIX(ldff1d(z2.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, z7.VnD(), LSL, 3)),
- "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]");
- COMPARE_PREFIX(ldff1sh(z10.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x19, z15.VnD(), LSL, 1)),
- "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]");
- COMPARE_PREFIX(ldff1sw(z5.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x23, z31.VnD(), LSL, 2)),
- "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]");
+ COMPARE(ldff1h(z25.VnD(),
+ p3.Zeroing(),
+ SVEMemOperand(x17, z15.VnD(), LSL, 1)),
+ "ldff1h {z25.d}, p3/z, [x17, z15.d, lsl #1]");
+ COMPARE(ldff1w(z5.VnD(), p4.Zeroing(), SVEMemOperand(x23, z31.VnD(), LSL, 2)),
+ "ldff1w {z5.d}, p4/z, [x23, z31.d, lsl #2]");
+ COMPARE(ldff1d(z2.VnD(), p0.Zeroing(), SVEMemOperand(sp, z7.VnD(), LSL, 3)),
+ "ldff1d {z2.d}, p0/z, [sp, z7.d, lsl #3]");
+ COMPARE(ldff1sh(z10.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, z15.VnD(), LSL, 1)),
+ "ldff1sh {z10.d}, p0/z, [x19, z15.d, lsl #1]");
+ COMPARE(ldff1sw(z5.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x23, z31.VnD(), LSL, 2)),
+ "ldff1sw {z5.d}, p4/z, [x23, z31.d, lsl #2]");
// 32-bit unpacked unscaled offset
- COMPARE_PREFIX(ldff1b(z18.VnD(),
- p6.Zeroing(),
- SVEMemOperand(sp, z24.VnD(), UXTW)),
- "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]");
- COMPARE_PREFIX(ldff1h(z20.VnD(),
- p5.Zeroing(),
- SVEMemOperand(x7, z14.VnD(), SXTW)),
- "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]");
- COMPARE_PREFIX(ldff1w(z22.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x17, z4.VnD(), UXTW)),
- "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]");
- COMPARE_PREFIX(ldff1d(z24.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x3, z24.VnD(), SXTW)),
- "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]");
- COMPARE_PREFIX(ldff1sb(z26.VnD(),
- p2.Zeroing(),
- SVEMemOperand(x13, z14.VnD(), UXTW)),
- "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]");
- COMPARE_PREFIX(ldff1sh(z28.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x23, z4.VnD(), SXTW)),
- "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]");
- COMPARE_PREFIX(ldff1sw(z30.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x8, z24.VnD(), UXTW)),
- "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]");
+ COMPARE(ldff1b(z18.VnD(), p6.Zeroing(), SVEMemOperand(sp, z24.VnD(), UXTW)),
+ "ldff1b {z18.d}, p6/z, [sp, z24.d, uxtw]");
+ COMPARE(ldff1h(z20.VnD(), p5.Zeroing(), SVEMemOperand(x7, z14.VnD(), SXTW)),
+ "ldff1h {z20.d}, p5/z, [x7, z14.d, sxtw]");
+ COMPARE(ldff1w(z22.VnD(), p4.Zeroing(), SVEMemOperand(x17, z4.VnD(), UXTW)),
+ "ldff1w {z22.d}, p4/z, [x17, z4.d, uxtw]");
+ COMPARE(ldff1d(z24.VnD(), p3.Zeroing(), SVEMemOperand(x3, z24.VnD(), SXTW)),
+ "ldff1d {z24.d}, p3/z, [x3, z24.d, sxtw]");
+ COMPARE(ldff1sb(z26.VnD(), p2.Zeroing(), SVEMemOperand(x13, z14.VnD(), UXTW)),
+ "ldff1sb {z26.d}, p2/z, [x13, z14.d, uxtw]");
+ COMPARE(ldff1sh(z28.VnD(), p1.Zeroing(), SVEMemOperand(x23, z4.VnD(), SXTW)),
+ "ldff1sh {z28.d}, p1/z, [x23, z4.d, sxtw]");
+ COMPARE(ldff1sw(z30.VnD(), p0.Zeroing(), SVEMemOperand(x8, z24.VnD(), UXTW)),
+ "ldff1sw {z30.d}, p0/z, [x8, z24.d, uxtw]");
// 32-bit unpacked scaled offset
- COMPARE_PREFIX(ldff1h(z4.VnD(),
- p5.Zeroing(),
- SVEMemOperand(x7, z1.VnD(), SXTW, 1)),
- "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]");
- COMPARE_PREFIX(ldff1w(z5.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x17, z11.VnD(), UXTW, 2)),
- "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]");
- COMPARE_PREFIX(ldff1d(z6.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x3, z31.VnD(), SXTW, 3)),
- "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]");
- COMPARE_PREFIX(ldff1sh(z7.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x23, z7.VnD(), UXTW, 1)),
- "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]");
- COMPARE_PREFIX(ldff1sw(z8.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x8, z17.VnD(), SXTW, 2)),
- "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]");
+ COMPARE(ldff1h(z4.VnD(), p5.Zeroing(), SVEMemOperand(x7, z1.VnD(), SXTW, 1)),
+ "ldff1h {z4.d}, p5/z, [x7, z1.d, sxtw #1]");
+ COMPARE(ldff1w(z5.VnD(),
+ p4.Zeroing(),
+ SVEMemOperand(x17, z11.VnD(), UXTW, 2)),
+ "ldff1w {z5.d}, p4/z, [x17, z11.d, uxtw #2]");
+ COMPARE(ldff1d(z6.VnD(), p3.Zeroing(), SVEMemOperand(x3, z31.VnD(), SXTW, 3)),
+ "ldff1d {z6.d}, p3/z, [x3, z31.d, sxtw #3]");
+ COMPARE(ldff1sh(z7.VnD(),
+ p1.Zeroing(),
+ SVEMemOperand(x23, z7.VnD(), UXTW, 1)),
+ "ldff1sh {z7.d}, p1/z, [x23, z7.d, uxtw #1]");
+ COMPARE(ldff1sw(z8.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x8, z17.VnD(), SXTW, 2)),
+ "ldff1sw {z8.d}, p0/z, [x8, z17.d, sxtw #2]");
CLEANUP();
}
@@ -3926,57 +3873,57 @@ TEST(sve_mem_64bit_ff_gather_scalar_plus_vector) {
TEST(sve_ld2_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)),
- "ld2b {z31.b, z0.b}, p6/z, [x19]");
- COMPARE_PREFIX(ld2b(z31.VnB(),
- z0.VnB(),
- p6.Zeroing(),
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]");
- COMPARE_PREFIX(ld2b(z15.VnB(),
- z16.VnB(),
- p6.Zeroing(),
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)),
- "ld2h {z15.h, z16.h}, p6/z, [x19]");
- COMPARE_PREFIX(ld2h(z15.VnH(),
- z16.VnH(),
- p0.Zeroing(),
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]");
- COMPARE_PREFIX(ld2h(z15.VnH(),
- z16.VnH(),
- p0.Zeroing(),
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)),
- "ld2w {z0.s, z1.s}, p0/z, [x19]");
- COMPARE_PREFIX(ld2w(z0.VnS(),
- z1.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]");
- COMPARE_PREFIX(ld2w(z0.VnS(),
- z1.VnS(),
- p7.Zeroing(),
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)),
- "ld2d {z0.d, z1.d}, p7/z, [x19]");
- COMPARE_PREFIX(ld2d(z31.VnD(),
- z0.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]");
- COMPARE_PREFIX(ld2d(z31.VnD(),
- z0.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]");
+ COMPARE(ld2b(z31.VnB(), z0.VnB(), p6.Zeroing(), SVEMemOperand(x19)),
+ "ld2b {z31.b, z0.b}, p6/z, [x19]");
+ COMPARE(ld2b(z31.VnB(),
+ z0.VnB(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "ld2b {z31.b, z0.b}, p6/z, [x19, #14, mul vl]");
+ COMPARE(ld2b(z15.VnB(),
+ z16.VnB(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "ld2b {z15.b, z16.b}, p6/z, [x19, #-16, mul vl]");
+
+ COMPARE(ld2h(z15.VnH(), z16.VnH(), p6.Zeroing(), SVEMemOperand(x19)),
+ "ld2h {z15.h, z16.h}, p6/z, [x19]");
+ COMPARE(ld2h(z15.VnH(),
+ z16.VnH(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "ld2h {z15.h, z16.h}, p0/z, [x19, #14, mul vl]");
+ COMPARE(ld2h(z15.VnH(),
+ z16.VnH(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "ld2h {z15.h, z16.h}, p0/z, [x19, #-16, mul vl]");
+
+ COMPARE(ld2w(z0.VnS(), z1.VnS(), p0.Zeroing(), SVEMemOperand(x19)),
+ "ld2w {z0.s, z1.s}, p0/z, [x19]");
+ COMPARE(ld2w(z0.VnS(),
+ z1.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "ld2w {z0.s, z1.s}, p0/z, [x19, #14, mul vl]");
+ COMPARE(ld2w(z0.VnS(),
+ z1.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "ld2w {z0.s, z1.s}, p7/z, [x19, #-16, mul vl]");
+
+ COMPARE(ld2d(z0.VnD(), z1.VnD(), p7.Zeroing(), SVEMemOperand(x19)),
+ "ld2d {z0.d, z1.d}, p7/z, [x19]");
+ COMPARE(ld2d(z31.VnD(),
+ z0.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "ld2d {z31.d, z0.d}, p7/z, [x19, #14, mul vl]");
+ COMPARE(ld2d(z31.VnD(),
+ z0.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "ld2d {z31.d, z0.d}, p7/z, [x19, #-16, mul vl]");
CLEANUP();
}
@@ -3984,81 +3931,77 @@ TEST(sve_ld2_scalar_plus_immediate) {
TEST(sve_ld3_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(ld3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p7.Zeroing(),
- SVEMemOperand(x19)),
- "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]");
- COMPARE_PREFIX(ld3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p6.Zeroing(),
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]");
- COMPARE_PREFIX(ld3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p6.Zeroing(),
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(ld3h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- p6.Zeroing(),
- SVEMemOperand(x19)),
- "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]");
- COMPARE_PREFIX(ld3h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- p6.Zeroing(),
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]");
- COMPARE_PREFIX(ld3h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- p0.Zeroing(),
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(ld3w(z15.VnS(),
- z16.VnS(),
- z17.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19)),
- "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]");
- COMPARE_PREFIX(ld3w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]");
- COMPARE_PREFIX(ld3w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(ld3d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19)),
- "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]");
- COMPARE_PREFIX(ld3d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]");
- COMPARE_PREFIX(ld3d(z30.VnD(),
- z31.VnD(),
- z0.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]");
+ COMPARE(ld3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p7.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld3b {z30.b, z31.b, z0.b}, p7/z, [x19]");
+ COMPARE(ld3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #21, mul vl]");
+ COMPARE(ld3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "ld3b {z30.b, z31.b, z0.b}, p6/z, [x19, #-24, mul vl]");
+
+ COMPARE(ld3h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ p6.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19]");
+ COMPARE(ld3h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "ld3h {z15.h, z16.h, z17.h}, p6/z, [x19, #21, mul vl]");
+ COMPARE(ld3h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "ld3h {z15.h, z16.h, z17.h}, p0/z, [x19, #-24, mul vl]");
+
+ COMPARE(ld3w(z15.VnS(),
+ z16.VnS(),
+ z17.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld3w {z15.s, z16.s, z17.s}, p0/z, [x19]");
+ COMPARE(ld3w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #21, mul vl]");
+ COMPARE(ld3w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "ld3w {z0.s, z1.s, z2.s}, p0/z, [x19, #-24, mul vl]");
+
+ COMPARE(ld3d(z0.VnD(), z1.VnD(), z2.VnD(), p7.Zeroing(), SVEMemOperand(x19)),
+ "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19]");
+ COMPARE(ld3d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "ld3d {z0.d, z1.d, z2.d}, p7/z, [x19, #21, mul vl]");
+ COMPARE(ld3d(z30.VnD(),
+ z31.VnD(),
+ z0.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "ld3d {z30.d, z31.d, z0.d}, p7/z, [x19, #-24, mul vl]");
CLEANUP();
}
@@ -4066,96 +4009,96 @@ TEST(sve_ld3_scalar_plus_immediate) {
TEST(sve_ld4_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(ld4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p7.Zeroing(),
- SVEMemOperand(x19)),
- "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]");
- COMPARE_PREFIX(ld4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p7.Zeroing(),
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]");
- COMPARE_PREFIX(ld4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p6.Zeroing(),
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]");
-
- COMPARE_PREFIX(ld4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p6.Zeroing(),
- SVEMemOperand(x19)),
- "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]");
- COMPARE_PREFIX(ld4h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- z18.VnH(),
- p6.Zeroing(),
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
- "[x19, #28, mul vl]");
- COMPARE_PREFIX(ld4h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- z18.VnH(),
- p6.Zeroing(),
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
- "[x19, #-32, mul vl]");
-
- COMPARE_PREFIX(ld4w(z15.VnS(),
- z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19)),
- "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]");
- COMPARE_PREFIX(ld4w(z15.VnS(),
- z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, "
- "[x19, #28, mul vl]");
- COMPARE_PREFIX(ld4w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- z3.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]");
-
- COMPARE_PREFIX(ld4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x19)),
- "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]");
- COMPARE_PREFIX(ld4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]");
- COMPARE_PREFIX(ld4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]");
+ COMPARE(ld4b(z31.VnB(),
+ z0.VnB(),
+ z1.VnB(),
+ z2.VnB(),
+ p7.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19]");
+ COMPARE(ld4b(z31.VnB(),
+ z0.VnB(),
+ z1.VnB(),
+ z2.VnB(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "ld4b {z31.b, z0.b, z1.b, z2.b}, p7/z, [x19, #28, mul vl]");
+ COMPARE(ld4b(z31.VnB(),
+ z0.VnB(),
+ z1.VnB(),
+ z2.VnB(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x19, #-32, mul vl]");
+
+ COMPARE(ld4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p6.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x19]");
+ COMPARE(ld4h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ z18.VnH(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
+ "[x19, #28, mul vl]");
+ COMPARE(ld4h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ z18.VnH(),
+ p6.Zeroing(),
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "ld4h {z15.h, z16.h, z17.h, z18.h}, p6/z, "
+ "[x19, #-32, mul vl]");
+
+ COMPARE(ld4w(z15.VnS(),
+ z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, [x19]");
+ COMPARE(ld4w(z15.VnS(),
+ z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "ld4w {z15.s, z16.s, z17.s, z18.s}, p0/z, "
+ "[x19, #28, mul vl]");
+ COMPARE(ld4w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ z3.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [x19, #-32, mul vl]");
+
+ COMPARE(ld4d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ z3.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x19)),
+ "ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [x19]");
+ COMPARE(ld4d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ z3.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #28, mul vl]");
+ COMPARE(ld4d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ z3.VnD(),
+ p7.Zeroing(),
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "ld4d {z0.d, z1.d, z2.d, z3.d}, p7/z, [x19, #-32, mul vl]");
CLEANUP();
}
@@ -4163,69 +4106,60 @@ TEST(sve_ld4_scalar_plus_immediate) {
TEST(sve_ld2_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(ld2b(z25.VnB(),
- z26.VnB(),
- p1.Zeroing(),
- SVEMemOperand(x20, x19)),
- "ld2b {z25.b, z26.b}, p1/z, [x20, x19]");
- COMPARE_PREFIX(ld2b(z25.VnB(),
- z26.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld2b {z25.b, z26.b}, p1/z, [sp, x19]");
- COMPARE_PREFIX(ld2b(z31.VnB(),
- z0.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld2b {z31.b, z0.b}, p1/z, [sp, x19]");
-
- COMPARE_PREFIX(ld2h(z31.VnH(),
- z0.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 1)),
- "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
- COMPARE_PREFIX(ld2h(z31.VnH(),
- z0.VnH(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
- COMPARE_PREFIX(ld2h(z31.VnH(),
- z0.VnH(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(ld2w(z16.VnS(),
- z17.VnS(),
- p7.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 2)),
- "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]");
- COMPARE_PREFIX(ld2w(z16.VnS(),
- z17.VnS(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]");
- COMPARE_PREFIX(ld2w(z16.VnS(),
- z17.VnS(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]");
-
- COMPARE_PREFIX(ld2d(z16.VnD(),
- z17.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 3)),
- "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]");
- COMPARE_PREFIX(ld2d(z25.VnD(),
- z26.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
- COMPARE_PREFIX(ld2d(z25.VnD(),
- z26.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
+ COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(x20, x19)),
+ "ld2b {z25.b, z26.b}, p1/z, [x20, x19]");
+ COMPARE(ld2b(z25.VnB(), z26.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)),
+ "ld2b {z25.b, z26.b}, p1/z, [sp, x19]");
+ COMPARE(ld2b(z31.VnB(), z0.VnB(), p1.Zeroing(), SVEMemOperand(sp, x19)),
+ "ld2b {z31.b, z0.b}, p1/z, [sp, x19]");
+
+ COMPARE(ld2h(z31.VnH(),
+ z0.VnH(),
+ p1.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 1)),
+ "ld2h {z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
+ COMPARE(ld2h(z31.VnH(),
+ z0.VnH(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
+ COMPARE(ld2h(z31.VnH(),
+ z0.VnH(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld2h {z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
+
+ COMPARE(ld2w(z16.VnS(),
+ z17.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 2)),
+ "ld2w {z16.s, z17.s}, p7/z, [x20, x19, lsl #2]");
+ COMPARE(ld2w(z16.VnS(),
+ z17.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld2w {z16.s, z17.s}, p7/z, [sp, x19, lsl #2]");
+ COMPARE(ld2w(z16.VnS(),
+ z17.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld2w {z16.s, z17.s}, p0/z, [sp, x19, lsl #2]");
+
+ COMPARE(ld2d(z16.VnD(),
+ z17.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 3)),
+ "ld2d {z16.d, z17.d}, p0/z, [x20, x19, lsl #3]");
+ COMPARE(ld2d(z25.VnD(),
+ z26.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
+ COMPARE(ld2d(z25.VnD(),
+ z26.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld2d {z25.d, z26.d}, p0/z, [sp, x19, lsl #3]");
CLEANUP();
}
@@ -4233,81 +4167,81 @@ TEST(sve_ld2_scalar_plus_scalar) {
TEST(sve_ld3_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(ld3b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- p1.Zeroing(),
- SVEMemOperand(x20, x19)),
- "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]");
- COMPARE_PREFIX(ld3b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]");
- COMPARE_PREFIX(ld3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]");
-
- COMPARE_PREFIX(ld3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 1)),
- "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
- COMPARE_PREFIX(ld3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
- COMPARE_PREFIX(ld3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(ld3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p7.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 2)),
- "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]");
- COMPARE_PREFIX(ld3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]");
- COMPARE_PREFIX(ld3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]");
-
- COMPARE_PREFIX(ld3d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 3)),
- "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]");
- COMPARE_PREFIX(ld3d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
- COMPARE_PREFIX(ld3d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
+ COMPARE(ld3b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(x20, x19)),
+ "ld3b {z25.b, z26.b, z27.b}, p1/z, [x20, x19]");
+ COMPARE(ld3b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(sp, x19)),
+ "ld3b {z25.b, z26.b, z27.b}, p1/z, [sp, x19]");
+ COMPARE(ld3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(sp, x19)),
+ "ld3b {z30.b, z31.b, z0.b}, p1/z, [sp, x19]");
+
+ COMPARE(ld3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p1.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 1)),
+ "ld3h {z30.h, z31.h, z0.h}, p1/z, [x20, x19, lsl #1]");
+ COMPARE(ld3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
+ COMPARE(ld3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld3h {z30.h, z31.h, z0.h}, p7/z, [sp, x19, lsl #1]");
+
+ COMPARE(ld3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 2)),
+ "ld3w {z16.s, z17.s, z18.s}, p7/z, [x20, x19, lsl #2]");
+ COMPARE(ld3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld3w {z16.s, z17.s, z18.s}, p7/z, [sp, x19, lsl #2]");
+ COMPARE(ld3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld3w {z16.s, z17.s, z18.s}, p0/z, [sp, x19, lsl #2]");
+
+ COMPARE(ld3d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 3)),
+ "ld3d {z16.d, z17.d, z18.d}, p0/z, [x20, x19, lsl #3]");
+ COMPARE(ld3d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
+ COMPARE(ld3d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld3d {z25.d, z26.d, z27.d}, p0/z, [sp, x19, lsl #3]");
CLEANUP();
}
@@ -4315,98 +4249,98 @@ TEST(sve_ld3_scalar_plus_scalar) {
TEST(sve_ld4_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(ld4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p0.Zeroing(),
- SVEMemOperand(x20, x19)),
- "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]");
- COMPARE_PREFIX(ld4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
- COMPARE_PREFIX(ld4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19)),
- "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
-
- COMPARE_PREFIX(ld4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 1)),
- "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]");
- COMPARE_PREFIX(ld4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p1.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]");
- COMPARE_PREFIX(ld4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 1)),
- "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(ld4w(z31.VnS(),
- z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p7.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 2)),
- "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]");
- COMPARE_PREFIX(ld4w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- z19.VnS(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
- "[sp, x19, lsl #2]");
- COMPARE_PREFIX(ld4w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- z19.VnS(),
- p7.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 2)),
- "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
- "[sp, x19, lsl #2]");
-
- COMPARE_PREFIX(ld4d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- z19.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x20, x19, LSL, 3)),
- "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
- "[x20, x19, lsl #3]");
- COMPARE_PREFIX(ld4d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- z19.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
- "[sp, x19, lsl #3]");
- COMPARE_PREFIX(ld4d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- z28.VnD(),
- p0.Zeroing(),
- SVEMemOperand(sp, x19, LSL, 3)),
- "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, "
- "[sp, x19, lsl #3]");
+ COMPARE(ld4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p0.Zeroing(),
+ SVEMemOperand(x20, x19)),
+ "ld4b {z25.b, z26.b, z27.b, z28.b}, p0/z, [x20, x19]");
+ COMPARE(ld4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(sp, x19)),
+ "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
+ COMPARE(ld4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p1.Zeroing(),
+ SVEMemOperand(sp, x19)),
+ "ld4b {z25.b, z26.b, z27.b, z28.b}, p1/z, [sp, x19]");
+
+ COMPARE(ld4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p1.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 1)),
+ "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [x20, x19, lsl #1]");
+ COMPARE(ld4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p1.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld4h {z31.h, z0.h, z1.h, z2.h}, p1/z, [sp, x19, lsl #1]");
+ COMPARE(ld4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "ld4h {z31.h, z0.h, z1.h, z2.h}, p7/z, [sp, x19, lsl #1]");
+
+ COMPARE(ld4w(z31.VnS(),
+ z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 2)),
+ "ld4w {z31.s, z0.s, z1.s, z2.s}, p7/z, [x20, x19, lsl #2]");
+ COMPARE(ld4w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ z19.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
+ "[sp, x19, lsl #2]");
+ COMPARE(ld4w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ z19.VnS(),
+ p7.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "ld4w {z16.s, z17.s, z18.s, z19.s}, p7/z, "
+ "[sp, x19, lsl #2]");
+
+ COMPARE(ld4d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ z19.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(x20, x19, LSL, 3)),
+ "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
+ "[x20, x19, lsl #3]");
+ COMPARE(ld4d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ z19.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld4d {z16.d, z17.d, z18.d, z19.d}, p0/z, "
+ "[sp, x19, lsl #3]");
+ COMPARE(ld4d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ z28.VnD(),
+ p0.Zeroing(),
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "ld4d {z25.d, z26.d, z27.d, z28.d}, p0/z, "
+ "[sp, x19, lsl #3]");
CLEANUP();
}
@@ -4414,47 +4348,39 @@ TEST(sve_ld4_scalar_plus_scalar) {
TEST(sve_ff_contiguous) {
SETUP();
- COMPARE_PREFIX(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)),
- "ldff1b {z24.b}, p1/z, [x21]");
- COMPARE_PREFIX(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)),
- "ldff1b {z22.h}, p5/z, [x5, x28]");
- COMPARE_PREFIX(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)),
- "ldff1b {z2.s}, p5/z, [sp, x11]");
- COMPARE_PREFIX(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)),
- "ldff1b {z12.d}, p3/z, [x26]");
- COMPARE_PREFIX(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)),
- "ldff1h {z21.h}, p3/z, [x27]");
- COMPARE_PREFIX(ldff1h(z11.VnS(),
- p6.Zeroing(),
- SVEMemOperand(sp, x15, LSL, 1)),
- "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]");
- COMPARE_PREFIX(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)),
- "ldff1h {z6.d}, p7/z, [x8]");
- COMPARE_PREFIX(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)),
- "ldff1w {z11.s}, p7/z, [sp]");
- COMPARE_PREFIX(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)),
- "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]");
- COMPARE_PREFIX(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)),
- "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]");
-
- COMPARE_PREFIX(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)),
- "ldff1sb {z31.h}, p4/z, [x10, x25]");
- COMPARE_PREFIX(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)),
- "ldff1sb {z25.s}, p7/z, [sp, x20]");
- COMPARE_PREFIX(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)),
- "ldff1sb {z20.d}, p3/z, [x19]");
- COMPARE_PREFIX(ldff1sh(z18.VnS(),
- p3.Zeroing(),
- SVEMemOperand(sp, x0, LSL, 1)),
- "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]");
- COMPARE_PREFIX(ldff1sh(z30.VnD(),
- p1.Zeroing(),
- SVEMemOperand(x28, xzr, LSL, 1)),
- "ldff1sh {z30.d}, p1/z, [x28]");
- COMPARE_PREFIX(ldff1sw(z3.VnD(),
- p4.Zeroing(),
- SVEMemOperand(x22, x18, LSL, 2)),
- "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]");
+ COMPARE(ldff1b(z24.VnB(), p1.Zeroing(), SVEMemOperand(x21)),
+ "ldff1b {z24.b}, p1/z, [x21]");
+ COMPARE(ldff1b(z22.VnH(), p5.Zeroing(), SVEMemOperand(x5, x28)),
+ "ldff1b {z22.h}, p5/z, [x5, x28]");
+ COMPARE(ldff1b(z2.VnS(), p5.Zeroing(), SVEMemOperand(sp, x11)),
+ "ldff1b {z2.s}, p5/z, [sp, x11]");
+ COMPARE(ldff1b(z12.VnD(), p3.Zeroing(), SVEMemOperand(x26, xzr)),
+ "ldff1b {z12.d}, p3/z, [x26]");
+ COMPARE(ldff1h(z21.VnH(), p3.Zeroing(), SVEMemOperand(x27)),
+ "ldff1h {z21.h}, p3/z, [x27]");
+ COMPARE(ldff1h(z11.VnS(), p6.Zeroing(), SVEMemOperand(sp, x15, LSL, 1)),
+ "ldff1h {z11.s}, p6/z, [sp, x15, lsl #1]");
+ COMPARE(ldff1h(z6.VnD(), p7.Zeroing(), SVEMemOperand(x8, xzr, LSL, 1)),
+ "ldff1h {z6.d}, p7/z, [x8]");
+ COMPARE(ldff1w(z11.VnS(), p7.Zeroing(), SVEMemOperand(sp)),
+ "ldff1w {z11.s}, p7/z, [sp]");
+ COMPARE(ldff1w(z6.VnD(), p6.Zeroing(), SVEMemOperand(x5, x0, LSL, 2)),
+ "ldff1w {z6.d}, p6/z, [x5, x0, lsl #2]");
+ COMPARE(ldff1d(z0.VnD(), p3.Zeroing(), SVEMemOperand(x15, x1, LSL, 3)),
+ "ldff1d {z0.d}, p3/z, [x15, x1, lsl #3]");
+
+ COMPARE(ldff1sb(z31.VnH(), p4.Zeroing(), SVEMemOperand(x10, x25)),
+ "ldff1sb {z31.h}, p4/z, [x10, x25]");
+ COMPARE(ldff1sb(z25.VnS(), p7.Zeroing(), SVEMemOperand(sp, x20)),
+ "ldff1sb {z25.s}, p7/z, [sp, x20]");
+ COMPARE(ldff1sb(z20.VnD(), p3.Zeroing(), SVEMemOperand(x19, xzr)),
+ "ldff1sb {z20.d}, p3/z, [x19]");
+ COMPARE(ldff1sh(z18.VnS(), p3.Zeroing(), SVEMemOperand(sp, x0, LSL, 1)),
+ "ldff1sh {z18.s}, p3/z, [sp, x0, lsl #1]");
+ COMPARE(ldff1sh(z30.VnD(), p1.Zeroing(), SVEMemOperand(x28, xzr, LSL, 1)),
+ "ldff1sh {z30.d}, p1/z, [x28]");
+ COMPARE(ldff1sw(z3.VnD(), p4.Zeroing(), SVEMemOperand(x22, x18, LSL, 2)),
+ "ldff1sw {z3.d}, p4/z, [x22, x18, lsl #2]");
CLEANUP();
}
@@ -4462,26 +4388,37 @@ TEST(sve_ff_contiguous) {
TEST(sve_mem_contiguous_load) {
SETUP();
- COMPARE_PREFIX(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)),
- "ld1rqb {z3.b}, p2/z, [x22, x18]");
- COMPARE_PREFIX(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)),
- "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]");
- COMPARE_PREFIX(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)),
- "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]");
- COMPARE_PREFIX(ld1rqw(z12.VnS(),
- p4.Zeroing(),
- SVEMemOperand(sp, xzr, LSL, 2)),
- "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]");
- COMPARE_PREFIX(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)),
- "ld1rqb {z18.b}, p2/z, [x18]");
- COMPARE_PREFIX(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)),
- "ld1rqb {z18.b}, p2/z, [x18, #16]");
- COMPARE_PREFIX(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)),
- "ld1rqd {z11.d}, p1/z, [x23, #-16]");
- COMPARE_PREFIX(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)),
- "ld1rqh {z11.h}, p1/z, [x0, #112]");
- COMPARE_PREFIX(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)),
- "ld1rqw {z22.s}, p3/z, [sp, #-128]");
+ COMPARE(ld1rqb(z3.VnB(), p2.Zeroing(), SVEMemOperand(x22, x18)),
+ "ld1rqb {z3.b}, p2/z, [x22, x18]");
+ COMPARE(ld1rqd(z6.VnD(), p0.Zeroing(), SVEMemOperand(x18, x9, LSL, 3)),
+ "ld1rqd {z6.d}, p0/z, [x18, x9, lsl #3]");
+ COMPARE(ld1rqh(z1.VnH(), p7.Zeroing(), SVEMemOperand(x9, x6, LSL, 1)),
+ "ld1rqh {z1.h}, p7/z, [x9, x6, lsl #1]");
+ COMPARE(ld1rqw(z12.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
+ "ld1rqw {z12.s}, p4/z, [sp, xzr, lsl #2]");
+ COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 0)),
+ "ld1rqb {z18.b}, p2/z, [x18]");
+ COMPARE(ld1rqb(z18.VnB(), p2.Zeroing(), SVEMemOperand(x18, 16)),
+ "ld1rqb {z18.b}, p2/z, [x18, #16]");
+ COMPARE(ld1rqd(z11.VnD(), p1.Zeroing(), SVEMemOperand(x23, -16)),
+ "ld1rqd {z11.d}, p1/z, [x23, #-16]");
+ COMPARE(ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)),
+ "ld1rqh {z11.h}, p1/z, [x0, #112]");
+ COMPARE(ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)),
+ "ld1rqw {z22.s}, p3/z, [sp, #-128]");
+
+ COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
+ "ld1rqb {z0.b}, p0/z, [x0, x1]");
+ COMPARE_MACRO(Ld1rqh(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
+ "ld1rqh {z0.h}, p0/z, [x0, x1, lsl #1]");
+ COMPARE_MACRO(Ld1rqw(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
+ "ld1rqw {z0.s}, p0/z, [x0, x1, lsl #2]");
+ COMPARE_MACRO(Ld1rqd(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
+ "ld1rqd {z0.d}, p0/z, [x0, x1, lsl #3]");
+ COMPARE_MACRO(Ld1rqh(z11.VnH(), p1.Zeroing(), SVEMemOperand(x0, 112)),
+ "ld1rqh {z11.h}, p1/z, [x0, #112]");
+ COMPARE_MACRO(Ld1rqw(z22.VnS(), p3.Zeroing(), SVEMemOperand(sp, -128)),
+ "ld1rqw {z22.s}, p3/z, [sp, #-128]");
COMPARE_MACRO(Ld1rqb(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, 2222)),
"add x16, x0, #0x8ae (2222)\n"
@@ -4493,43 +4430,45 @@ TEST(sve_mem_contiguous_load) {
"add x16, x0, x1, lsl #1\n"
"ld1rqd {z0.d}, p0/z, [x16]");
- COMPARE_PREFIX(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
- "ldnt1b {z21.b}, p5/z, [x1, x23]");
- COMPARE_PREFIX(ldnt1d(z10.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x23, x6, LSL, 3)),
- "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]");
- COMPARE_PREFIX(ldnt1h(z30.VnH(),
- p4.Zeroing(),
- SVEMemOperand(x6, x11, LSL, 1)),
- "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]");
- COMPARE_PREFIX(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
- "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]");
- COMPARE_PREFIX(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
- "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]");
-
- COMPARE_PREFIX(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
- "ldnt1b {z1.b}, p3/z, [x11]");
- COMPARE_PREFIX(ldnt1b(z2.VnB(),
- p2.Zeroing(),
- SVEMemOperand(x12, -8, SVE_MUL_VL)),
- "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]");
- COMPARE_PREFIX(ldnt1d(z2.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x13, -2, SVE_MUL_VL)),
- "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]");
- COMPARE_PREFIX(ldnt1h(z26.VnH(),
- p4.Zeroing(),
- SVEMemOperand(x16, 3, SVE_MUL_VL)),
- "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]");
- COMPARE_PREFIX(ldnt1w(z17.VnS(),
- p4.Zeroing(),
- SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]");
- COMPARE_PREFIX(ldnt1w(z17.VnS(),
- p4.Zeroing(),
- SVEMemOperand(sp, 7, SVE_MUL_VL)),
- "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]");
+ COMPARE(ldnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
+ "ldnt1b {z21.b}, p5/z, [x1, x23]");
+ COMPARE(ldnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)),
+ "ldnt1d {z10.d}, p0/z, [x23, x6, lsl #3]");
+ COMPARE(ldnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)),
+ "ldnt1h {z30.h}, p4/z, [x6, x11, lsl #1]");
+ COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
+ "ldnt1w {z0.s}, p4/z, [x11, x1, lsl #2]");
+ COMPARE(ldnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
+ "ldnt1w {z0.s}, p4/z, [sp, xzr, lsl #2]");
+
+ COMPARE_MACRO(Ldnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
+ "ldnt1b {z0.b}, p0/z, [x0, x1]");
+ COMPARE_MACRO(Ldnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
+ "ldnt1h {z0.h}, p0/z, [x0, x1, lsl #1]");
+ COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
+ "ldnt1w {z0.s}, p0/z, [x0, x1, lsl #2]");
+ COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
+ "ldnt1d {z0.d}, p0/z, [x0, x1, lsl #3]");
+
+ COMPARE_MACRO(Ldnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
+ "add x16, x0, x1, lsl #3\n"
+ "ldnt1w {z0.s}, p0/z, [x16]");
+ COMPARE_MACRO(Ldnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 4)),
+ "add x16, x0, x1, lsl #4\n"
+ "ldnt1d {z0.d}, p0/z, [x16]");
+
+ COMPARE(ldnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
+ "ldnt1b {z1.b}, p3/z, [x11]");
+ COMPARE(ldnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)),
+ "ldnt1b {z2.b}, p2/z, [x12, #-8, mul vl]");
+ COMPARE(ldnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
+ "ldnt1d {z2.d}, p7/z, [x13, #-2, mul vl]");
+ COMPARE(ldnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)),
+ "ldnt1h {z26.h}, p4/z, [x16, #3, mul vl]");
+ COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "ldnt1w {z17.s}, p4/z, [x15, #7, mul vl]");
+ COMPARE(ldnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)),
+ "ldnt1w {z17.s}, p4/z, [sp, #7, mul vl]");
COMPARE_MACRO(Ldnt1b(z2.VnB(),
p0.Zeroing(),
@@ -4554,68 +4493,38 @@ TEST(sve_mem_contiguous_load) {
"add x16, x13, #0x3 (3)\n"
"ldnt1d {z5.d}, p3/z, [x16]");
- COMPARE_PREFIX(ldnf1b(z1.VnH(),
- p0.Zeroing(),
- SVEMemOperand(x25, -8, SVE_MUL_VL)),
- "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]");
- COMPARE_PREFIX(ldnf1b(z0.VnS(),
- p0.Zeroing(),
- SVEMemOperand(x2, 7, SVE_MUL_VL)),
- "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]");
- COMPARE_PREFIX(ldnf1b(z31.VnD(),
- p6.Zeroing(),
- SVEMemOperand(x0, -7, SVE_MUL_VL)),
- "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]");
- COMPARE_PREFIX(ldnf1b(z25.VnB(),
- p1.Zeroing(),
- SVEMemOperand(x5, 6, SVE_MUL_VL)),
- "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]");
- COMPARE_PREFIX(ldnf1d(z25.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x11, -6, SVE_MUL_VL)),
- "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]");
- COMPARE_PREFIX(ldnf1h(z22.VnH(),
- p4.Zeroing(),
- SVEMemOperand(x7, 5, SVE_MUL_VL)),
- "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]");
- COMPARE_PREFIX(ldnf1h(z7.VnS(),
- p2.Zeroing(),
- SVEMemOperand(x1, -5, SVE_MUL_VL)),
- "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]");
- COMPARE_PREFIX(ldnf1h(z5.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x29, 4, SVE_MUL_VL)),
- "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]");
- COMPARE_PREFIX(ldnf1sb(z12.VnH(),
- p5.Zeroing(),
- SVEMemOperand(x27, -4, SVE_MUL_VL)),
- "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]");
- COMPARE_PREFIX(ldnf1sb(z10.VnS(),
- p2.Zeroing(),
- SVEMemOperand(x13, 3, SVE_MUL_VL)),
- "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]");
- COMPARE_PREFIX(ldnf1sb(z25.VnD(),
- p6.Zeroing(),
- SVEMemOperand(x26, -3, SVE_MUL_VL)),
- "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]");
- COMPARE_PREFIX(ldnf1sh(z3.VnS(),
- p5.Zeroing(),
- SVEMemOperand(x1, 2, SVE_MUL_VL)),
- "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]");
- COMPARE_PREFIX(ldnf1sh(z8.VnD(),
- p6.Zeroing(),
- SVEMemOperand(x13, -2, SVE_MUL_VL)),
- "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]");
- COMPARE_PREFIX(ldnf1sw(z5.VnD(),
- p6.Zeroing(),
- SVEMemOperand(x2, 1, SVE_MUL_VL)),
- "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]");
- COMPARE_PREFIX(ldnf1w(z11.VnS(),
- p3.Zeroing(),
- SVEMemOperand(sp, -1, SVE_MUL_VL)),
- "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]");
- COMPARE_PREFIX(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)),
- "ldnf1w {z10.d}, p6/z, [x12]");
+ COMPARE(ldnf1b(z1.VnH(), p0.Zeroing(), SVEMemOperand(x25, -8, SVE_MUL_VL)),
+ "ldnf1b {z1.h}, p0/z, [x25, #-8, mul vl]");
+ COMPARE(ldnf1b(z0.VnS(), p0.Zeroing(), SVEMemOperand(x2, 7, SVE_MUL_VL)),
+ "ldnf1b {z0.s}, p0/z, [x2, #7, mul vl]");
+ COMPARE(ldnf1b(z31.VnD(), p6.Zeroing(), SVEMemOperand(x0, -7, SVE_MUL_VL)),
+ "ldnf1b {z31.d}, p6/z, [x0, #-7, mul vl]");
+ COMPARE(ldnf1b(z25.VnB(), p1.Zeroing(), SVEMemOperand(x5, 6, SVE_MUL_VL)),
+ "ldnf1b {z25.b}, p1/z, [x5, #6, mul vl]");
+ COMPARE(ldnf1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(x11, -6, SVE_MUL_VL)),
+ "ldnf1d {z25.d}, p0/z, [x11, #-6, mul vl]");
+ COMPARE(ldnf1h(z22.VnH(), p4.Zeroing(), SVEMemOperand(x7, 5, SVE_MUL_VL)),
+ "ldnf1h {z22.h}, p4/z, [x7, #5, mul vl]");
+ COMPARE(ldnf1h(z7.VnS(), p2.Zeroing(), SVEMemOperand(x1, -5, SVE_MUL_VL)),
+ "ldnf1h {z7.s}, p2/z, [x1, #-5, mul vl]");
+ COMPARE(ldnf1h(z5.VnD(), p3.Zeroing(), SVEMemOperand(x29, 4, SVE_MUL_VL)),
+ "ldnf1h {z5.d}, p3/z, [x29, #4, mul vl]");
+ COMPARE(ldnf1sb(z12.VnH(), p5.Zeroing(), SVEMemOperand(x27, -4, SVE_MUL_VL)),
+ "ldnf1sb {z12.h}, p5/z, [x27, #-4, mul vl]");
+ COMPARE(ldnf1sb(z10.VnS(), p2.Zeroing(), SVEMemOperand(x13, 3, SVE_MUL_VL)),
+ "ldnf1sb {z10.s}, p2/z, [x13, #3, mul vl]");
+ COMPARE(ldnf1sb(z25.VnD(), p6.Zeroing(), SVEMemOperand(x26, -3, SVE_MUL_VL)),
+ "ldnf1sb {z25.d}, p6/z, [x26, #-3, mul vl]");
+ COMPARE(ldnf1sh(z3.VnS(), p5.Zeroing(), SVEMemOperand(x1, 2, SVE_MUL_VL)),
+ "ldnf1sh {z3.s}, p5/z, [x1, #2, mul vl]");
+ COMPARE(ldnf1sh(z8.VnD(), p6.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
+ "ldnf1sh {z8.d}, p6/z, [x13, #-2, mul vl]");
+ COMPARE(ldnf1sw(z5.VnD(), p6.Zeroing(), SVEMemOperand(x2, 1, SVE_MUL_VL)),
+ "ldnf1sw {z5.d}, p6/z, [x2, #1, mul vl]");
+ COMPARE(ldnf1w(z11.VnS(), p3.Zeroing(), SVEMemOperand(sp, -1, SVE_MUL_VL)),
+ "ldnf1w {z11.s}, p3/z, [sp, #-1, mul vl]");
+ COMPARE(ldnf1w(z10.VnD(), p6.Zeroing(), SVEMemOperand(x12)),
+ "ldnf1w {z10.d}, p6/z, [x12]");
CLEANUP();
}
@@ -4623,43 +4532,29 @@ TEST(sve_mem_contiguous_load) {
TEST(sve_mem_contiguous_store) {
SETUP();
- COMPARE_PREFIX(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
- "stnt1b {z21.b}, p5, [x1, x23]");
- COMPARE_PREFIX(stnt1d(z10.VnD(),
- p0.Zeroing(),
- SVEMemOperand(x23, x6, LSL, 3)),
- "stnt1d {z10.d}, p0, [x23, x6, lsl #3]");
- COMPARE_PREFIX(stnt1h(z30.VnH(),
- p4.Zeroing(),
- SVEMemOperand(x6, x11, LSL, 1)),
- "stnt1h {z30.h}, p4, [x6, x11, lsl #1]");
- COMPARE_PREFIX(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
- "stnt1w {z0.s}, p4, [x11, x1, lsl #2]");
- COMPARE_PREFIX(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
- "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]");
-
- COMPARE_PREFIX(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
- "stnt1b {z1.b}, p3, [x11]");
- COMPARE_PREFIX(stnt1b(z2.VnB(),
- p2.Zeroing(),
- SVEMemOperand(x12, -8, SVE_MUL_VL)),
- "stnt1b {z2.b}, p2, [x12, #-8, mul vl]");
- COMPARE_PREFIX(stnt1d(z2.VnD(),
- p7.Zeroing(),
- SVEMemOperand(x13, -2, SVE_MUL_VL)),
- "stnt1d {z2.d}, p7, [x13, #-2, mul vl]");
- COMPARE_PREFIX(stnt1h(z26.VnH(),
- p4.Zeroing(),
- SVEMemOperand(x16, 3, SVE_MUL_VL)),
- "stnt1h {z26.h}, p4, [x16, #3, mul vl]");
- COMPARE_PREFIX(stnt1w(z17.VnS(),
- p4.Zeroing(),
- SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "stnt1w {z17.s}, p4, [x15, #7, mul vl]");
- COMPARE_PREFIX(stnt1w(z17.VnS(),
- p4.Zeroing(),
- SVEMemOperand(sp, 7, SVE_MUL_VL)),
- "stnt1w {z17.s}, p4, [sp, #7, mul vl]");
+ COMPARE(stnt1b(z21.VnB(), p5.Zeroing(), SVEMemOperand(x1, x23)),
+ "stnt1b {z21.b}, p5, [x1, x23]");
+ COMPARE(stnt1d(z10.VnD(), p0.Zeroing(), SVEMemOperand(x23, x6, LSL, 3)),
+ "stnt1d {z10.d}, p0, [x23, x6, lsl #3]");
+ COMPARE(stnt1h(z30.VnH(), p4.Zeroing(), SVEMemOperand(x6, x11, LSL, 1)),
+ "stnt1h {z30.h}, p4, [x6, x11, lsl #1]");
+ COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(x11, x1, LSL, 2)),
+ "stnt1w {z0.s}, p4, [x11, x1, lsl #2]");
+ COMPARE(stnt1w(z0.VnS(), p4.Zeroing(), SVEMemOperand(sp, xzr, LSL, 2)),
+ "stnt1w {z0.s}, p4, [sp, xzr, lsl #2]");
+
+ COMPARE(stnt1b(z1.VnB(), p3.Zeroing(), SVEMemOperand(x11)),
+ "stnt1b {z1.b}, p3, [x11]");
+ COMPARE(stnt1b(z2.VnB(), p2.Zeroing(), SVEMemOperand(x12, -8, SVE_MUL_VL)),
+ "stnt1b {z2.b}, p2, [x12, #-8, mul vl]");
+ COMPARE(stnt1d(z2.VnD(), p7.Zeroing(), SVEMemOperand(x13, -2, SVE_MUL_VL)),
+ "stnt1d {z2.d}, p7, [x13, #-2, mul vl]");
+ COMPARE(stnt1h(z26.VnH(), p4.Zeroing(), SVEMemOperand(x16, 3, SVE_MUL_VL)),
+ "stnt1h {z26.h}, p4, [x16, #3, mul vl]");
+ COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "stnt1w {z17.s}, p4, [x15, #7, mul vl]");
+ COMPARE(stnt1w(z17.VnS(), p4.Zeroing(), SVEMemOperand(sp, 7, SVE_MUL_VL)),
+ "stnt1w {z17.s}, p4, [sp, #7, mul vl]");
COMPARE_MACRO(Stnt1b(z2.VnB(),
p0.Zeroing(),
@@ -4681,24 +4576,58 @@ TEST(sve_mem_contiguous_store) {
"madd x16, x16, x17, x12\n"
"stnt1w {z4.s}, p2, [x16]");
+ COMPARE_MACRO(Stnt1b(z0.VnB(), p0.Zeroing(), SVEMemOperand(x0, x1)),
+ "stnt1b {z0.b}, p0, [x0, x1]");
+ COMPARE_MACRO(Stnt1h(z0.VnH(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 1)),
+ "stnt1h {z0.h}, p0, [x0, x1, lsl #1]");
+ COMPARE_MACRO(Stnt1w(z0.VnS(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 2)),
+ "stnt1w {z0.s}, p0, [x0, x1, lsl #2]");
+ COMPARE_MACRO(Stnt1d(z0.VnD(), p0.Zeroing(), SVEMemOperand(x0, x1, LSL, 3)),
+ "stnt1d {z0.d}, p0, [x0, x1, lsl #3]");
+
+ CLEANUP();
+}
+
+TEST(sve_load_broadcast_octo) {
+ SETUP();
+
+ COMPARE_MACRO(Ld1rob(z3.VnB(), p1.Zeroing(), SVEMemOperand(x0, x1)),
+ "ld1rob {z3.b}, p1/z, [x0, x1]");
+ COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x31, LSL, 1)),
+ "ld1roh {z6.h}, p4/z, [sp]");
+ COMPARE_MACRO(Ld1roh(z6.VnH(), p4.Zeroing(), SVEMemOperand(sp, x30, LSL, 1)),
+ "ld1roh {z6.h}, p4/z, [sp, x30, lsl #1]");
+ COMPARE_MACRO(Ld1row(z2.VnS(), p6.Zeroing(), SVEMemOperand(x30, x30, LSL, 2)),
+ "ld1row {z2.s}, p6/z, [x30, x30, lsl #2]");
+ COMPARE_MACRO(Ld1rod(z30.VnD(), p7.Zeroing(), SVEMemOperand(x21, x5, LSL, 3)),
+ "ld1rod {z30.d}, p7/z, [x21, x5, lsl #3]");
+ COMPARE_MACRO(Ld1rob(z9.VnB(), p0.Zeroing(), SVEMemOperand(sp, 32)),
+ "ld1rob {z9.b}, p0/z, [sp, #32]");
+ COMPARE_MACRO(Ld1roh(z19.VnH(), p3.Zeroing(), SVEMemOperand(x4)),
+ "ld1roh {z19.h}, p3/z, [x4]");
+ COMPARE_MACRO(Ld1row(z21.VnS(), p3.Zeroing(), SVEMemOperand(x11, 224)),
+ "ld1row {z21.s}, p3/z, [x11, #224]");
+ COMPARE_MACRO(Ld1rod(z0.VnD(), p2.Zeroing(), SVEMemOperand(x16, -256)),
+ "ld1rod {z0.d}, p2/z, [x16, #-256]");
+
CLEANUP();
}
TEST(sve_ldr_str_simple) {
SETUP();
- COMPARE_PREFIX(str(p14, SVEMemOperand(x0)), "str p14, [x0]");
- COMPARE_PREFIX(str(z14, SVEMemOperand(sp)), "str z14, [sp]");
- COMPARE_PREFIX(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]");
- COMPARE_PREFIX(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]");
- COMPARE_PREFIX(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)),
- "str p15, [sp, #-256, mul vl]");
- COMPARE_PREFIX(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)),
- "str z16, [x13, #255, mul vl]");
- COMPARE_PREFIX(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)),
- "ldr p5, [sp, #-42, mul vl]");
- COMPARE_PREFIX(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)),
- "ldr z6, [x28, #42, mul vl]");
+ COMPARE(str(p14, SVEMemOperand(x0)), "str p14, [x0]");
+ COMPARE(str(z14, SVEMemOperand(sp)), "str z14, [sp]");
+ COMPARE(ldr(p4, SVEMemOperand(x0)), "ldr p4, [x0]");
+ COMPARE(ldr(z4, SVEMemOperand(sp)), "ldr z4, [sp]");
+ COMPARE(str(p15, SVEMemOperand(sp, -256, SVE_MUL_VL)),
+ "str p15, [sp, #-256, mul vl]");
+ COMPARE(str(z16, SVEMemOperand(x13, 255, SVE_MUL_VL)),
+ "str z16, [x13, #255, mul vl]");
+ COMPARE(ldr(p5, SVEMemOperand(sp, -42, SVE_MUL_VL)),
+ "ldr p5, [sp, #-42, mul vl]");
+ COMPARE(ldr(z6, SVEMemOperand(x28, 42, SVE_MUL_VL)),
+ "ldr z6, [x28, #42, mul vl]");
COMPARE_MACRO(Str(p14, SVEMemOperand(x0)), "str p14, [x0]");
COMPARE_MACRO(Str(z14, SVEMemOperand(sp)), "str z14, [sp]");
@@ -4755,194 +4684,161 @@ TEST(sve_ldr_str_simple) {
TEST(sve_ld1_st1) {
SETUP();
- COMPARE_PREFIX(st1b(z11.VnB(), p0, SVEMemOperand(x22)),
- "st1b {z11.b}, p0, [x22]");
- COMPARE_PREFIX(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "st1b {z15.h}, p1, [x15, #7, mul vl]");
- COMPARE_PREFIX(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "st1b {z19.s}, p2, [sp, #-8, mul vl]");
- COMPARE_PREFIX(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "st1b {z23.d}, p3, [x1]");
- COMPARE_PREFIX(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)),
- "st1b {z2.b}, p4, [x1, x2]");
- COMPARE_PREFIX(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)),
- "st1b {z31.d}, p7, [x9, x9]");
- COMPARE_PREFIX(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
- "st1b {z3.s}, p0, [z14.s, #30]");
- COMPARE_PREFIX(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)),
- "st1b {z14.d}, p4, [z3.d, #31]");
- COMPARE_PREFIX(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())),
- "st1b {z15.d}, p5, [x0, z5.d]");
- COMPARE_PREFIX(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)),
- "st1b {z15.s}, p5, [sp, z2.s, uxtw]");
- COMPARE_PREFIX(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)),
- "st1b {z15.d}, p5, [x0, z25.d, sxtw]");
-
- COMPARE_PREFIX(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "st1h {z15.h}, p1, [x15, #7, mul vl]");
- COMPARE_PREFIX(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "st1h {z19.s}, p2, [sp, #-8, mul vl]");
- COMPARE_PREFIX(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "st1h {z23.d}, p3, [x1]");
- COMPARE_PREFIX(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)),
- "st1h {z2.h}, p4, [x1, x2, lsl #1]");
- COMPARE_PREFIX(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)),
- "st1h {z31.d}, p7, [x9, x9, lsl #1]");
- COMPARE_PREFIX(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
- "st1h {z3.s}, p0, [z14.s, #30]");
- COMPARE_PREFIX(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)),
- "st1h {z14.d}, p4, [z3.d, #62]");
- COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())),
- "st1h {z15.d}, p6, [sp, z6.d]");
- COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)),
- "st1h {z15.d}, p6, [sp, z6.d, lsl #1]");
- COMPARE_PREFIX(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)),
- "st1h {z15.s}, p3, [x25, z3.s, sxtw]");
- COMPARE_PREFIX(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)),
- "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]");
- COMPARE_PREFIX(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)),
- "st1h {z17.d}, p3, [sp, z26.d, sxtw]");
- COMPARE_PREFIX(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)),
- "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]");
-
- COMPARE_PREFIX(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "st1w {z19.s}, p2, [sp, #-8, mul vl]");
- COMPARE_PREFIX(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "st1w {z23.d}, p3, [x1]");
- COMPARE_PREFIX(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)),
- "st1w {z2.s}, p4, [x1, x2, lsl #2]");
- COMPARE_PREFIX(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)),
- "st1w {z31.d}, p7, [x9, x9, lsl #2]");
- COMPARE_PREFIX(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)),
- "st1w {z3.s}, p0, [z14.s, #32]");
- COMPARE_PREFIX(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)),
- "st1w {z14.d}, p4, [z3.d, #124]");
- COMPARE_PREFIX(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())),
- "st1w {z17.d}, p2, [x30, z5.d]");
- COMPARE_PREFIX(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)),
- "st1w {z17.d}, p2, [x30, z5.d, lsl #2]");
- COMPARE_PREFIX(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)),
- "st1w {z15.s}, p7, [x26, z4.s, uxtw]");
- COMPARE_PREFIX(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)),
- "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]");
- COMPARE_PREFIX(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)),
- "st1w {z19.d}, p7, [x1, z27.d, uxtw]");
- COMPARE_PREFIX(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)),
- "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]");
-
- COMPARE_PREFIX(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "st1d {z23.d}, p3, [x1]");
- COMPARE_PREFIX(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)),
- "st1d {z31.d}, p7, [x9, x9, lsl #3]");
- COMPARE_PREFIX(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)),
- "st1d {z14.d}, p4, [z3.d, #32]");
- COMPARE_PREFIX(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)),
- "st1d {z14.d}, p4, [z3.d, #248]");
- COMPARE_PREFIX(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())),
- "st1d {z19.d}, p2, [x29, z22.d]");
- COMPARE_PREFIX(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)),
- "st1d {z19.d}, p2, [x29, z22.d, lsl #3]");
- COMPARE_PREFIX(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)),
- "st1d {z21.d}, p1, [x2, z28.d, sxtw]");
- COMPARE_PREFIX(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)),
- "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]");
-
- COMPARE_PREFIX(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)),
- "ld1b {z11.b}, p0/z, [x22]");
- COMPARE_PREFIX(ld1b(z15.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "ld1b {z15.h}, p1/z, [x15, #7, mul vl]");
- COMPARE_PREFIX(ld1b(z19.VnS(),
- p2.Zeroing(),
- SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]");
- COMPARE_PREFIX(ld1b(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1b {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)),
- "ld1b {z2.b}, p4/z, [x1, x2]");
- COMPARE_PREFIX(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
- "ld1b {z31.d}, p7/z, [x9, x9]");
-
- COMPARE_PREFIX(ld1h(z15.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "ld1h {z15.h}, p1/z, [x15, #7, mul vl]");
- COMPARE_PREFIX(ld1h(z19.VnS(),
- p2.Zeroing(),
- SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]");
- COMPARE_PREFIX(ld1h(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1h {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)),
- "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]");
- COMPARE_PREFIX(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
- "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]");
-
- COMPARE_PREFIX(ld1w(z19.VnS(),
- p2.Zeroing(),
- SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]");
- COMPARE_PREFIX(ld1w(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1w {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)),
- "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]");
- COMPARE_PREFIX(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
- "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]");
-
- COMPARE_PREFIX(ld1d(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1d {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)),
- "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]");
-
- COMPARE_PREFIX(ld1sb(z15.VnH(),
- p1.Zeroing(),
- SVEMemOperand(x15, 7, SVE_MUL_VL)),
- "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]");
- COMPARE_PREFIX(ld1sb(z19.VnS(),
- p2.Zeroing(),
- SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]");
- COMPARE_PREFIX(ld1d(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1d {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)),
- "ld1sb {z5.h}, p1/z, [x15, x1]");
- COMPARE_PREFIX(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)),
- "ld1sb {z9.s}, p2/z, [x29, x3]");
- COMPARE_PREFIX(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
- "ld1sb {z31.d}, p7/z, [x9, x9]");
-
- COMPARE_PREFIX(ld1sh(z19.VnS(),
- p2.Zeroing(),
- SVEMemOperand(sp, -8, SVE_MUL_VL)),
- "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]");
- COMPARE_PREFIX(ld1sh(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1sh {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1sh(z11.VnS(),
- p4.Zeroing(),
- SVEMemOperand(x22, x10, LSL, 1)),
- "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]");
- COMPARE_PREFIX(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
- "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]");
-
- COMPARE_PREFIX(ld1sw(z23.VnD(),
- p3.Zeroing(),
- SVEMemOperand(x1, 0, SVE_MUL_VL)),
- "ld1sw {z23.d}, p3/z, [x1]");
- COMPARE_PREFIX(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
- "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]");
+ COMPARE(st1b(z11.VnB(), p0, SVEMemOperand(x22)), "st1b {z11.b}, p0, [x22]");
+ COMPARE(st1b(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "st1b {z15.h}, p1, [x15, #7, mul vl]");
+ COMPARE(st1b(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "st1b {z19.s}, p2, [sp, #-8, mul vl]");
+ COMPARE(st1b(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "st1b {z23.d}, p3, [x1]");
+ COMPARE(st1b(z2.VnB(), p4, SVEMemOperand(x1, x2)),
+ "st1b {z2.b}, p4, [x1, x2]");
+ COMPARE(st1b(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 0)),
+ "st1b {z31.d}, p7, [x9, x9]");
+ COMPARE(st1b(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
+ "st1b {z3.s}, p0, [z14.s, #30]");
+ COMPARE(st1b(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 31)),
+ "st1b {z14.d}, p4, [z3.d, #31]");
+ COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z5.VnD())),
+ "st1b {z15.d}, p5, [x0, z5.d]");
+ COMPARE(st1b(z15.VnS(), p5, SVEMemOperand(sp, z2.VnS(), UXTW)),
+ "st1b {z15.s}, p5, [sp, z2.s, uxtw]");
+ COMPARE(st1b(z15.VnD(), p5, SVEMemOperand(x0, z25.VnD(), SXTW)),
+ "st1b {z15.d}, p5, [x0, z25.d, sxtw]");
+
+ COMPARE(st1h(z15.VnH(), p1, SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "st1h {z15.h}, p1, [x15, #7, mul vl]");
+ COMPARE(st1h(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "st1h {z19.s}, p2, [sp, #-8, mul vl]");
+ COMPARE(st1h(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "st1h {z23.d}, p3, [x1]");
+ COMPARE(st1h(z2.VnH(), p4, SVEMemOperand(x1, x2, LSL, 1)),
+ "st1h {z2.h}, p4, [x1, x2, lsl #1]");
+ COMPARE(st1h(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 1)),
+ "st1h {z31.d}, p7, [x9, x9, lsl #1]");
+ COMPARE(st1h(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 30)),
+ "st1h {z3.s}, p0, [z14.s, #30]");
+ COMPARE(st1h(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 62)),
+ "st1h {z14.d}, p4, [z3.d, #62]");
+ COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD())),
+ "st1h {z15.d}, p6, [sp, z6.d]");
+ COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(sp, z6.VnD(), LSL, 1)),
+ "st1h {z15.d}, p6, [sp, z6.d, lsl #1]");
+ COMPARE(st1h(z15.VnS(), p3, SVEMemOperand(x25, z3.VnS(), SXTW)),
+ "st1h {z15.s}, p3, [x25, z3.s, sxtw]");
+ COMPARE(st1h(z15.VnS(), p6, SVEMemOperand(x7, z15.VnS(), SXTW, 1)),
+ "st1h {z15.s}, p6, [x7, z15.s, sxtw #1]");
+ COMPARE(st1h(z17.VnD(), p3, SVEMemOperand(sp, z26.VnD(), SXTW)),
+ "st1h {z17.d}, p3, [sp, z26.d, sxtw]");
+ COMPARE(st1h(z15.VnD(), p6, SVEMemOperand(x13, z9.VnD(), UXTW, 1)),
+ "st1h {z15.d}, p6, [x13, z9.d, uxtw #1]");
+
+ COMPARE(st1w(z19.VnS(), p2, SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "st1w {z19.s}, p2, [sp, #-8, mul vl]");
+ COMPARE(st1w(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "st1w {z23.d}, p3, [x1]");
+ COMPARE(st1w(z2.VnS(), p4, SVEMemOperand(x1, x2, LSL, 2)),
+ "st1w {z2.s}, p4, [x1, x2, lsl #2]");
+ COMPARE(st1w(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 2)),
+ "st1w {z31.d}, p7, [x9, x9, lsl #2]");
+ COMPARE(st1w(z3.VnS(), p0, SVEMemOperand(z14.VnS(), 32)),
+ "st1w {z3.s}, p0, [z14.s, #32]");
+ COMPARE(st1w(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 124)),
+ "st1w {z14.d}, p4, [z3.d, #124]");
+ COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD())),
+ "st1w {z17.d}, p2, [x30, z5.d]");
+ COMPARE(st1w(z17.VnD(), p2, SVEMemOperand(x30, z5.VnD(), LSL, 2)),
+ "st1w {z17.d}, p2, [x30, z5.d, lsl #2]");
+ COMPARE(st1w(z15.VnS(), p7, SVEMemOperand(x26, z4.VnS(), UXTW)),
+ "st1w {z15.s}, p7, [x26, z4.s, uxtw]");
+ COMPARE(st1w(z15.VnS(), p4, SVEMemOperand(x8, z16.VnS(), UXTW, 2)),
+ "st1w {z15.s}, p4, [x8, z16.s, uxtw #2]");
+ COMPARE(st1w(z19.VnD(), p7, SVEMemOperand(x1, z27.VnD(), UXTW)),
+ "st1w {z19.d}, p7, [x1, z27.d, uxtw]");
+ COMPARE(st1w(z15.VnD(), p4, SVEMemOperand(sp, z10.VnD(), SXTW, 2)),
+ "st1w {z15.d}, p4, [sp, z10.d, sxtw #2]");
+
+ COMPARE(st1d(z23.VnD(), p3, SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "st1d {z23.d}, p3, [x1]");
+ COMPARE(st1d(z31.VnD(), p7, SVEMemOperand(x9, x9, LSL, 3)),
+ "st1d {z31.d}, p7, [x9, x9, lsl #3]");
+ COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 32)),
+ "st1d {z14.d}, p4, [z3.d, #32]");
+ COMPARE(st1d(z14.VnD(), p4, SVEMemOperand(z3.VnD(), 248)),
+ "st1d {z14.d}, p4, [z3.d, #248]");
+ COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD())),
+ "st1d {z19.d}, p2, [x29, z22.d]");
+ COMPARE(st1d(z19.VnD(), p2, SVEMemOperand(x29, z22.VnD(), LSL, 3)),
+ "st1d {z19.d}, p2, [x29, z22.d, lsl #3]");
+ COMPARE(st1d(z21.VnD(), p1, SVEMemOperand(x2, z28.VnD(), SXTW)),
+ "st1d {z21.d}, p1, [x2, z28.d, sxtw]");
+ COMPARE(st1d(z15.VnD(), p2, SVEMemOperand(x14, z11.VnD(), UXTW, 3)),
+ "st1d {z15.d}, p2, [x14, z11.d, uxtw #3]");
+
+ COMPARE(ld1b(z11.VnB(), p0.Zeroing(), SVEMemOperand(x22)),
+ "ld1b {z11.b}, p0/z, [x22]");
+ COMPARE(ld1b(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "ld1b {z15.h}, p1/z, [x15, #7, mul vl]");
+ COMPARE(ld1b(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "ld1b {z19.s}, p2/z, [sp, #-8, mul vl]");
+ COMPARE(ld1b(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1b {z23.d}, p3/z, [x1]");
+ COMPARE(ld1b(z2.VnB(), p4.Zeroing(), SVEMemOperand(x1, x2)),
+ "ld1b {z2.b}, p4/z, [x1, x2]");
+ COMPARE(ld1b(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
+ "ld1b {z31.d}, p7/z, [x9, x9]");
+
+ COMPARE(ld1h(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "ld1h {z15.h}, p1/z, [x15, #7, mul vl]");
+ COMPARE(ld1h(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "ld1h {z19.s}, p2/z, [sp, #-8, mul vl]");
+ COMPARE(ld1h(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1h {z23.d}, p3/z, [x1]");
+ COMPARE(ld1h(z2.VnH(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 1)),
+ "ld1h {z2.h}, p4/z, [x1, x2, lsl #1]");
+ COMPARE(ld1h(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
+ "ld1h {z31.d}, p7/z, [x9, x9, lsl #1]");
+
+ COMPARE(ld1w(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "ld1w {z19.s}, p2/z, [sp, #-8, mul vl]");
+ COMPARE(ld1w(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1w {z23.d}, p3/z, [x1]");
+ COMPARE(ld1w(z2.VnS(), p4.Zeroing(), SVEMemOperand(x1, x2, LSL, 2)),
+ "ld1w {z2.s}, p4/z, [x1, x2, lsl #2]");
+ COMPARE(ld1w(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
+ "ld1w {z31.d}, p7/z, [x9, x9, lsl #2]");
+
+ COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1d {z23.d}, p3/z, [x1]");
+ COMPARE(ld1d(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 3)),
+ "ld1d {z31.d}, p7/z, [x9, x9, lsl #3]");
+
+ COMPARE(ld1sb(z15.VnH(), p1.Zeroing(), SVEMemOperand(x15, 7, SVE_MUL_VL)),
+ "ld1sb {z15.h}, p1/z, [x15, #7, mul vl]");
+ COMPARE(ld1sb(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "ld1sb {z19.s}, p2/z, [sp, #-8, mul vl]");
+ COMPARE(ld1d(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1d {z23.d}, p3/z, [x1]");
+ COMPARE(ld1sb(z5.VnH(), p1.Zeroing(), SVEMemOperand(x15, x1, LSL, 0)),
+ "ld1sb {z5.h}, p1/z, [x15, x1]");
+ COMPARE(ld1sb(z9.VnS(), p2.Zeroing(), SVEMemOperand(x29, x3, LSL, 0)),
+ "ld1sb {z9.s}, p2/z, [x29, x3]");
+ COMPARE(ld1sb(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 0)),
+ "ld1sb {z31.d}, p7/z, [x9, x9]");
+
+ COMPARE(ld1sh(z19.VnS(), p2.Zeroing(), SVEMemOperand(sp, -8, SVE_MUL_VL)),
+ "ld1sh {z19.s}, p2/z, [sp, #-8, mul vl]");
+ COMPARE(ld1sh(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1sh {z23.d}, p3/z, [x1]");
+ COMPARE(ld1sh(z11.VnS(), p4.Zeroing(), SVEMemOperand(x22, x10, LSL, 1)),
+ "ld1sh {z11.s}, p4/z, [x22, x10, lsl #1]");
+ COMPARE(ld1sh(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 1)),
+ "ld1sh {z31.d}, p7/z, [x9, x9, lsl #1]");
+
+ COMPARE(ld1sw(z23.VnD(), p3.Zeroing(), SVEMemOperand(x1, 0, SVE_MUL_VL)),
+ "ld1sw {z23.d}, p3/z, [x1]");
+ COMPARE(ld1sw(z31.VnD(), p7.Zeroing(), SVEMemOperand(x9, x9, LSL, 2)),
+ "ld1sw {z31.d}, p7/z, [x9, x9, lsl #2]");
CLEANUP();
}
@@ -5102,57 +4998,33 @@ TEST(sve_ld1_st1_macro) {
TEST(sve_st2_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)),
- "st2b {z31.b, z0.b}, p6, [x19]");
- COMPARE_PREFIX(st2b(z31.VnB(),
- z0.VnB(),
- p6,
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]");
- COMPARE_PREFIX(st2b(z15.VnB(),
- z16.VnB(),
- p6,
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)),
- "st2h {z15.h, z16.h}, p6, [x19]");
- COMPARE_PREFIX(st2h(z15.VnH(),
- z16.VnH(),
- p0,
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]");
- COMPARE_PREFIX(st2h(z15.VnH(),
- z16.VnH(),
- p0,
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)),
- "st2w {z0.s, z1.s}, p0, [x19]");
- COMPARE_PREFIX(st2w(z0.VnS(),
- z1.VnS(),
- p0,
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]");
- COMPARE_PREFIX(st2w(z0.VnS(),
- z1.VnS(),
- p7,
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]");
-
- COMPARE_PREFIX(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)),
- "st2d {z0.d, z1.d}, p7, [x19]");
- COMPARE_PREFIX(st2d(z31.VnD(),
- z0.VnD(),
- p7,
- SVEMemOperand(x19, 14, SVE_MUL_VL)),
- "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]");
- COMPARE_PREFIX(st2d(z31.VnD(),
- z0.VnD(),
- p7,
- SVEMemOperand(x19, -16, SVE_MUL_VL)),
- "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]");
+ COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19)),
+ "st2b {z31.b, z0.b}, p6, [x19]");
+ COMPARE(st2b(z31.VnB(), z0.VnB(), p6, SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "st2b {z31.b, z0.b}, p6, [x19, #14, mul vl]");
+ COMPARE(st2b(z15.VnB(), z16.VnB(), p6, SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "st2b {z15.b, z16.b}, p6, [x19, #-16, mul vl]");
+
+ COMPARE(st2h(z15.VnH(), z16.VnH(), p6, SVEMemOperand(x19)),
+ "st2h {z15.h, z16.h}, p6, [x19]");
+ COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "st2h {z15.h, z16.h}, p0, [x19, #14, mul vl]");
+ COMPARE(st2h(z15.VnH(), z16.VnH(), p0, SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "st2h {z15.h, z16.h}, p0, [x19, #-16, mul vl]");
+
+ COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19)),
+ "st2w {z0.s, z1.s}, p0, [x19]");
+ COMPARE(st2w(z0.VnS(), z1.VnS(), p0, SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "st2w {z0.s, z1.s}, p0, [x19, #14, mul vl]");
+ COMPARE(st2w(z0.VnS(), z1.VnS(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "st2w {z0.s, z1.s}, p7, [x19, #-16, mul vl]");
+
+ COMPARE(st2d(z0.VnD(), z1.VnD(), p7, SVEMemOperand(x19)),
+ "st2d {z0.d, z1.d}, p7, [x19]");
+ COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, 14, SVE_MUL_VL)),
+ "st2d {z31.d, z0.d}, p7, [x19, #14, mul vl]");
+ COMPARE(st2d(z31.VnD(), z0.VnD(), p7, SVEMemOperand(x19, -16, SVE_MUL_VL)),
+ "st2d {z31.d, z0.d}, p7, [x19, #-16, mul vl]");
CLEANUP();
}
@@ -5160,65 +5032,65 @@ TEST(sve_st2_scalar_plus_immediate) {
TEST(sve_st3_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)),
- "st3b {z30.b, z31.b, z0.b}, p7, [x19]");
- COMPARE_PREFIX(st3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p6,
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]");
- COMPARE_PREFIX(st3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p6,
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)),
- "st3h {z15.h, z16.h, z17.h}, p6, [x19]");
- COMPARE_PREFIX(st3h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- p6,
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]");
- COMPARE_PREFIX(st3h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- p0,
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)),
- "st3w {z15.s, z16.s, z17.s}, p0, [x19]");
- COMPARE_PREFIX(st3w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p0,
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]");
- COMPARE_PREFIX(st3w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p0,
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]");
-
- COMPARE_PREFIX(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)),
- "st3d {z0.d, z1.d, z2.d}, p7, [x19]");
- COMPARE_PREFIX(st3d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- p7,
- SVEMemOperand(x19, 21, SVE_MUL_VL)),
- "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]");
- COMPARE_PREFIX(st3d(z30.VnD(),
- z31.VnD(),
- z0.VnD(),
- p7,
- SVEMemOperand(x19, -24, SVE_MUL_VL)),
- "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]");
+ COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p7, SVEMemOperand(x19)),
+ "st3b {z30.b, z31.b, z0.b}, p7, [x19]");
+ COMPARE(st3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p6,
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "st3b {z30.b, z31.b, z0.b}, p6, [x19, #21, mul vl]");
+ COMPARE(st3b(z30.VnB(),
+ z31.VnB(),
+ z0.VnB(),
+ p6,
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "st3b {z30.b, z31.b, z0.b}, p6, [x19, #-24, mul vl]");
+
+ COMPARE(st3h(z15.VnH(), z16.VnH(), z17.VnH(), p6, SVEMemOperand(x19)),
+ "st3h {z15.h, z16.h, z17.h}, p6, [x19]");
+ COMPARE(st3h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ p6,
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "st3h {z15.h, z16.h, z17.h}, p6, [x19, #21, mul vl]");
+ COMPARE(st3h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ p0,
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "st3h {z15.h, z16.h, z17.h}, p0, [x19, #-24, mul vl]");
+
+ COMPARE(st3w(z15.VnS(), z16.VnS(), z17.VnS(), p0, SVEMemOperand(x19)),
+ "st3w {z15.s, z16.s, z17.s}, p0, [x19]");
+ COMPARE(st3w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p0,
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "st3w {z0.s, z1.s, z2.s}, p0, [x19, #21, mul vl]");
+ COMPARE(st3w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p0,
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "st3w {z0.s, z1.s, z2.s}, p0, [x19, #-24, mul vl]");
+
+ COMPARE(st3d(z0.VnD(), z1.VnD(), z2.VnD(), p7, SVEMemOperand(x19)),
+ "st3d {z0.d, z1.d, z2.d}, p7, [x19]");
+ COMPARE(st3d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ p7,
+ SVEMemOperand(x19, 21, SVE_MUL_VL)),
+ "st3d {z0.d, z1.d, z2.d}, p7, [x19, #21, mul vl]");
+ COMPARE(st3d(z30.VnD(),
+ z31.VnD(),
+ z0.VnD(),
+ p7,
+ SVEMemOperand(x19, -24, SVE_MUL_VL)),
+ "st3d {z30.d, z31.d, z0.d}, p7, [x19, #-24, mul vl]");
CLEANUP();
}
@@ -5226,94 +5098,79 @@ TEST(sve_st3_scalar_plus_immediate) {
TEST(sve_st4_scalar_plus_immediate) {
SETUP();
- COMPARE_PREFIX(st4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p7,
- SVEMemOperand(x19)),
- "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]");
- COMPARE_PREFIX(st4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p7,
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]");
- COMPARE_PREFIX(st4b(z31.VnB(),
- z0.VnB(),
- z1.VnB(),
- z2.VnB(),
- p6,
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]");
-
- COMPARE_PREFIX(st4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p6,
- SVEMemOperand(x19)),
- "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]");
- COMPARE_PREFIX(st4h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- z18.VnH(),
- p6,
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]");
- COMPARE_PREFIX(st4h(z15.VnH(),
- z16.VnH(),
- z17.VnH(),
- z18.VnH(),
- p6,
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "st4h {z15.h, z16.h, z17.h, z18.h}, p6, "
- "[x19, #-32, mul vl]");
-
- COMPARE_PREFIX(st4w(z15.VnS(),
- z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0,
- SVEMemOperand(x19)),
- "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]");
- COMPARE_PREFIX(st4w(z15.VnS(),
- z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0,
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]");
- COMPARE_PREFIX(st4w(z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- z3.VnS(),
- p0,
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]");
-
- COMPARE_PREFIX(st4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p0,
- SVEMemOperand(x19)),
- "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]");
- COMPARE_PREFIX(st4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p7,
- SVEMemOperand(x19, 28, SVE_MUL_VL)),
- "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]");
- COMPARE_PREFIX(st4d(z0.VnD(),
- z1.VnD(),
- z2.VnD(),
- z3.VnD(),
- p7,
- SVEMemOperand(x19, -32, SVE_MUL_VL)),
- "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]");
+ COMPARE(st4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p7, SVEMemOperand(x19)),
+ "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19]");
+ COMPARE(st4b(z31.VnB(),
+ z0.VnB(),
+ z1.VnB(),
+ z2.VnB(),
+ p7,
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "st4b {z31.b, z0.b, z1.b, z2.b}, p7, [x19, #28, mul vl]");
+ COMPARE(st4b(z31.VnB(),
+ z0.VnB(),
+ z1.VnB(),
+ z2.VnB(),
+ p6,
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x19, #-32, mul vl]");
+
+ COMPARE(st4h(z31.VnH(), z0.VnH(), z1.VnH(), z2.VnH(), p6, SVEMemOperand(x19)),
+ "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x19]");
+ COMPARE(st4h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ z18.VnH(),
+ p6,
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "st4h {z15.h, z16.h, z17.h, z18.h}, p6, [x19, #28, mul vl]");
+ COMPARE(st4h(z15.VnH(),
+ z16.VnH(),
+ z17.VnH(),
+ z18.VnH(),
+ p6,
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "st4h {z15.h, z16.h, z17.h, z18.h}, p6, "
+ "[x19, #-32, mul vl]");
+
+ COMPARE(st4w(z15.VnS(),
+ z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0,
+ SVEMemOperand(x19)),
+ "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19]");
+ COMPARE(st4w(z15.VnS(),
+ z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0,
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "st4w {z15.s, z16.s, z17.s, z18.s}, p0, [x19, #28, mul vl]");
+ COMPARE(st4w(z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ z3.VnS(),
+ p0,
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "st4w {z0.s, z1.s, z2.s, z3.s}, p0, [x19, #-32, mul vl]");
+
+ COMPARE(st4d(z0.VnD(), z1.VnD(), z2.VnD(), z3.VnD(), p0, SVEMemOperand(x19)),
+ "st4d {z0.d, z1.d, z2.d, z3.d}, p0, [x19]");
+ COMPARE(st4d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ z3.VnD(),
+ p7,
+ SVEMemOperand(x19, 28, SVE_MUL_VL)),
+ "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #28, mul vl]");
+ COMPARE(st4d(z0.VnD(),
+ z1.VnD(),
+ z2.VnD(),
+ z3.VnD(),
+ p7,
+ SVEMemOperand(x19, -32, SVE_MUL_VL)),
+ "st4d {z0.d, z1.d, z2.d, z3.d}, p7, [x19, #-32, mul vl]");
CLEANUP();
}
@@ -5321,39 +5178,33 @@ TEST(sve_st4_scalar_plus_immediate) {
TEST(sve_st2_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)),
- "st2b {z25.b, z26.b}, p1, [x20, x19]");
- COMPARE_PREFIX(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)),
- "st2b {z25.b, z26.b}, p1, [sp, x19]");
- COMPARE_PREFIX(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)),
- "st2b {z31.b, z0.b}, p1, [sp, x19]");
-
- COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)),
- "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]");
- COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
- "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
- COMPARE_PREFIX(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
- "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(st2w(z16.VnS(),
- z17.VnS(),
- p7,
- SVEMemOperand(x20, x19, LSL, 2)),
- "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]");
- COMPARE_PREFIX(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)),
- "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]");
- COMPARE_PREFIX(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)),
- "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]");
-
- COMPARE_PREFIX(st2d(z16.VnD(),
- z17.VnD(),
- p0,
- SVEMemOperand(x20, x19, LSL, 3)),
- "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]");
- COMPARE_PREFIX(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
- "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
- COMPARE_PREFIX(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
- "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(x20, x19)),
+ "st2b {z25.b, z26.b}, p1, [x20, x19]");
+ COMPARE(st2b(z25.VnB(), z26.VnB(), p1, SVEMemOperand(sp, x19)),
+ "st2b {z25.b, z26.b}, p1, [sp, x19]");
+ COMPARE(st2b(z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)),
+ "st2b {z31.b, z0.b}, p1, [sp, x19]");
+
+ COMPARE(st2h(z31.VnH(), z0.VnH(), p1, SVEMemOperand(x20, x19, LSL, 1)),
+ "st2h {z31.h, z0.h}, p1, [x20, x19, lsl #1]");
+ COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
+ "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
+ COMPARE(st2h(z31.VnH(), z0.VnH(), p7, SVEMemOperand(sp, x19, LSL, 1)),
+ "st2h {z31.h, z0.h}, p7, [sp, x19, lsl #1]");
+
+ COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(x20, x19, LSL, 2)),
+ "st2w {z16.s, z17.s}, p7, [x20, x19, lsl #2]");
+ COMPARE(st2w(z16.VnS(), z17.VnS(), p7, SVEMemOperand(sp, x19, LSL, 2)),
+ "st2w {z16.s, z17.s}, p7, [sp, x19, lsl #2]");
+ COMPARE(st2w(z16.VnS(), z17.VnS(), p0, SVEMemOperand(sp, x19, LSL, 2)),
+ "st2w {z16.s, z17.s}, p0, [sp, x19, lsl #2]");
+
+ COMPARE(st2d(z16.VnD(), z17.VnD(), p0, SVEMemOperand(x20, x19, LSL, 3)),
+ "st2d {z16.d, z17.d}, p0, [x20, x19, lsl #3]");
+ COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
+ "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st2d(z25.VnD(), z26.VnD(), p0, SVEMemOperand(sp, x19, LSL, 3)),
+ "st2d {z25.d, z26.d}, p0, [sp, x19, lsl #3]");
CLEANUP();
}
@@ -5361,81 +5212,69 @@ TEST(sve_st2_scalar_plus_scalar) {
TEST(sve_st3_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(st3b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- p1,
- SVEMemOperand(x20, x19)),
- "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]");
- COMPARE_PREFIX(st3b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- p1,
- SVEMemOperand(sp, x19)),
- "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]");
- COMPARE_PREFIX(st3b(z30.VnB(),
- z31.VnB(),
- z0.VnB(),
- p1,
- SVEMemOperand(sp, x19)),
- "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]");
-
- COMPARE_PREFIX(st3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p1,
- SVEMemOperand(x20, x19, LSL, 1)),
- "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]");
- COMPARE_PREFIX(st3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p7,
- SVEMemOperand(sp, x19, LSL, 1)),
- "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
- COMPARE_PREFIX(st3h(z30.VnH(),
- z31.VnH(),
- z0.VnH(),
- p7,
- SVEMemOperand(sp, x19, LSL, 1)),
- "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(st3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p7,
- SVEMemOperand(x20, x19, LSL, 2)),
- "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]");
- COMPARE_PREFIX(st3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p7,
- SVEMemOperand(sp, x19, LSL, 2)),
- "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]");
- COMPARE_PREFIX(st3w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- p0,
- SVEMemOperand(sp, x19, LSL, 2)),
- "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]");
-
- COMPARE_PREFIX(st3d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- p0,
- SVEMemOperand(x20, x19, LSL, 3)),
- "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]");
- COMPARE_PREFIX(st3d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- p0,
- SVEMemOperand(sp, x19, LSL, 3)),
- "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
- COMPARE_PREFIX(st3d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- p0,
- SVEMemOperand(sp, x19, LSL, 3)),
- "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(x20, x19)),
+ "st3b {z25.b, z26.b, z27.b}, p1, [x20, x19]");
+ COMPARE(st3b(z25.VnB(), z26.VnB(), z27.VnB(), p1, SVEMemOperand(sp, x19)),
+ "st3b {z25.b, z26.b, z27.b}, p1, [sp, x19]");
+ COMPARE(st3b(z30.VnB(), z31.VnB(), z0.VnB(), p1, SVEMemOperand(sp, x19)),
+ "st3b {z30.b, z31.b, z0.b}, p1, [sp, x19]");
+
+ COMPARE(st3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p1,
+ SVEMemOperand(x20, x19, LSL, 1)),
+ "st3h {z30.h, z31.h, z0.h}, p1, [x20, x19, lsl #1]");
+ COMPARE(st3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
+ COMPARE(st3h(z30.VnH(),
+ z31.VnH(),
+ z0.VnH(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "st3h {z30.h, z31.h, z0.h}, p7, [sp, x19, lsl #1]");
+
+ COMPARE(st3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p7,
+ SVEMemOperand(x20, x19, LSL, 2)),
+ "st3w {z16.s, z17.s, z18.s}, p7, [x20, x19, lsl #2]");
+ COMPARE(st3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "st3w {z16.s, z17.s, z18.s}, p7, [sp, x19, lsl #2]");
+ COMPARE(st3w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ p0,
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "st3w {z16.s, z17.s, z18.s}, p0, [sp, x19, lsl #2]");
+
+ COMPARE(st3d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ p0,
+ SVEMemOperand(x20, x19, LSL, 3)),
+ "st3d {z16.d, z17.d, z18.d}, p0, [x20, x19, lsl #3]");
+ COMPARE(st3d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ p0,
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st3d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ p0,
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "st3d {z25.d, z26.d, z27.d}, p0, [sp, x19, lsl #3]");
CLEANUP();
}
@@ -5443,93 +5282,93 @@ TEST(sve_st3_scalar_plus_scalar) {
TEST(sve_st4_scalar_plus_scalar) {
SETUP();
- COMPARE_PREFIX(st4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p0,
- SVEMemOperand(x20, x19)),
- "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]");
- COMPARE_PREFIX(st4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p1,
- SVEMemOperand(sp, x19)),
- "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
- COMPARE_PREFIX(st4b(z25.VnB(),
- z26.VnB(),
- z27.VnB(),
- z28.VnB(),
- p1,
- SVEMemOperand(sp, x19)),
- "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
-
- COMPARE_PREFIX(st4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p1,
- SVEMemOperand(x20, x19, LSL, 1)),
- "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]");
- COMPARE_PREFIX(st4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p1,
- SVEMemOperand(sp, x19, LSL, 1)),
- "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]");
- COMPARE_PREFIX(st4h(z31.VnH(),
- z0.VnH(),
- z1.VnH(),
- z2.VnH(),
- p7,
- SVEMemOperand(sp, x19, LSL, 1)),
- "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]");
-
- COMPARE_PREFIX(st4w(z31.VnS(),
- z0.VnS(),
- z1.VnS(),
- z2.VnS(),
- p7,
- SVEMemOperand(x20, x19, LSL, 2)),
- "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]");
- COMPARE_PREFIX(st4w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- z19.VnS(),
- p7,
- SVEMemOperand(sp, x19, LSL, 2)),
- "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
- COMPARE_PREFIX(st4w(z16.VnS(),
- z17.VnS(),
- z18.VnS(),
- z19.VnS(),
- p7,
- SVEMemOperand(sp, x19, LSL, 2)),
- "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
-
- COMPARE_PREFIX(st4d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- z19.VnD(),
- p0,
- SVEMemOperand(x20, x19, LSL, 3)),
- "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]");
- COMPARE_PREFIX(st4d(z16.VnD(),
- z17.VnD(),
- z18.VnD(),
- z19.VnD(),
- p0,
- SVEMemOperand(sp, x19, LSL, 3)),
- "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]");
- COMPARE_PREFIX(st4d(z25.VnD(),
- z26.VnD(),
- z27.VnD(),
- z28.VnD(),
- p0,
- SVEMemOperand(sp, x19, LSL, 3)),
- "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p0,
+ SVEMemOperand(x20, x19)),
+ "st4b {z25.b, z26.b, z27.b, z28.b}, p0, [x20, x19]");
+ COMPARE(st4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p1,
+ SVEMemOperand(sp, x19)),
+ "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
+ COMPARE(st4b(z25.VnB(),
+ z26.VnB(),
+ z27.VnB(),
+ z28.VnB(),
+ p1,
+ SVEMemOperand(sp, x19)),
+ "st4b {z25.b, z26.b, z27.b, z28.b}, p1, [sp, x19]");
+
+ COMPARE(st4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p1,
+ SVEMemOperand(x20, x19, LSL, 1)),
+ "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [x20, x19, lsl #1]");
+ COMPARE(st4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p1,
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "st4h {z31.h, z0.h, z1.h, z2.h}, p1, [sp, x19, lsl #1]");
+ COMPARE(st4h(z31.VnH(),
+ z0.VnH(),
+ z1.VnH(),
+ z2.VnH(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 1)),
+ "st4h {z31.h, z0.h, z1.h, z2.h}, p7, [sp, x19, lsl #1]");
+
+ COMPARE(st4w(z31.VnS(),
+ z0.VnS(),
+ z1.VnS(),
+ z2.VnS(),
+ p7,
+ SVEMemOperand(x20, x19, LSL, 2)),
+ "st4w {z31.s, z0.s, z1.s, z2.s}, p7, [x20, x19, lsl #2]");
+ COMPARE(st4w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ z19.VnS(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
+ COMPARE(st4w(z16.VnS(),
+ z17.VnS(),
+ z18.VnS(),
+ z19.VnS(),
+ p7,
+ SVEMemOperand(sp, x19, LSL, 2)),
+ "st4w {z16.s, z17.s, z18.s, z19.s}, p7, [sp, x19, lsl #2]");
+
+ COMPARE(st4d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ z19.VnD(),
+ p0,
+ SVEMemOperand(x20, x19, LSL, 3)),
+ "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [x20, x19, lsl #3]");
+ COMPARE(st4d(z16.VnD(),
+ z17.VnD(),
+ z18.VnD(),
+ z19.VnD(),
+ p0,
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "st4d {z16.d, z17.d, z18.d, z19.d}, p0, [sp, x19, lsl #3]");
+ COMPARE(st4d(z25.VnD(),
+ z26.VnD(),
+ z27.VnD(),
+ z28.VnD(),
+ p0,
+ SVEMemOperand(sp, x19, LSL, 3)),
+ "st4d {z25.d, z26.d, z27.d, z28.d}, p0, [sp, x19, lsl #3]");
CLEANUP();
}
@@ -5537,14 +5376,13 @@ TEST(sve_st4_scalar_plus_scalar) {
TEST(sve_mul_index) {
SETUP();
- COMPARE_PREFIX(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0),
- "sdot z17.d, z21.h, z15.h[0]");
- COMPARE_PREFIX(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1),
- "sdot z28.s, z9.b, z7.b[1]");
- COMPARE_PREFIX(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1),
- "udot z26.d, z15.h, z1.h[1]");
- COMPARE_PREFIX(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3),
- "udot z23.s, z24.b, z5.b[3]");
+ COMPARE(sdot(z17.VnD(), z21.VnH(), z15.VnH(), 0),
+ "sdot z17.d, z21.h, z15.h[0]");
+ COMPARE(sdot(z28.VnS(), z9.VnB(), z7.VnB(), 1), "sdot z28.s, z9.b, z7.b[1]");
+ COMPARE(udot(z26.VnD(), z15.VnH(), z1.VnH(), 1),
+ "udot z26.d, z15.h, z1.h[1]");
+ COMPARE(udot(z23.VnS(), z24.VnB(), z5.VnB(), 3),
+ "udot z23.s, z24.b, z5.b[3]");
CLEANUP();
}
@@ -5589,22 +5427,16 @@ TEST(sve_mul_index_macro) {
TEST(sve_partition_break) {
SETUP();
- COMPARE_PREFIX(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()),
- "brkas p8.b, p5/z, p4.b");
- COMPARE_PREFIX(brka(p11.VnB(), p7.Zeroing(), p15.VnB()),
- "brka p11.b, p7/z, p15.b");
- COMPARE_PREFIX(brka(p12.VnB(), p8.Merging(), p13.VnB()),
- "brka p12.b, p8/m, p13.b");
- COMPARE_PREFIX(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()),
- "brkbs p6.b, p9/z, p14.b");
- COMPARE_PREFIX(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()),
- "brkb p11.b, p6/z, p4.b");
- COMPARE_PREFIX(brkb(p12.VnB(), p7.Merging(), p5.VnB()),
- "brkb p12.b, p7/m, p5.b");
- COMPARE_PREFIX(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()),
- "brkns p2.b, p11/z, p0.b, p2.b");
- COMPARE_PREFIX(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()),
- "brkn p4.b, p3/z, p1.b, p4.b");
+ COMPARE(brkas(p8.VnB(), p5.Zeroing(), p4.VnB()), "brkas p8.b, p5/z, p4.b");
+ COMPARE(brka(p11.VnB(), p7.Zeroing(), p15.VnB()), "brka p11.b, p7/z, p15.b");
+ COMPARE(brka(p12.VnB(), p8.Merging(), p13.VnB()), "brka p12.b, p8/m, p13.b");
+ COMPARE(brkbs(p6.VnB(), p9.Zeroing(), p14.VnB()), "brkbs p6.b, p9/z, p14.b");
+ COMPARE(brkb(p11.VnB(), p6.Zeroing(), p4.VnB()), "brkb p11.b, p6/z, p4.b");
+ COMPARE(brkb(p12.VnB(), p7.Merging(), p5.VnB()), "brkb p12.b, p7/m, p5.b");
+ COMPARE(brkns(p2.VnB(), p11.Zeroing(), p0.VnB(), p2.VnB()),
+ "brkns p2.b, p11/z, p0.b, p2.b");
+ COMPARE(brkn(p4.VnB(), p3.Zeroing(), p1.VnB(), p4.VnB()),
+ "brkn p4.b, p3/z, p1.b, p4.b");
COMPARE_MACRO(Brkns(p3.VnB(), p10.Zeroing(), p2.VnB(), p5.VnB()),
"mov p3.b, p5.b\n"
@@ -5619,48 +5451,36 @@ TEST(sve_partition_break) {
TEST(sve_permute_predicate) {
SETUP();
- COMPARE_PREFIX(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b");
- COMPARE_PREFIX(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h");
- COMPARE_PREFIX(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s");
- COMPARE_PREFIX(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d");
- COMPARE_PREFIX(trn1(p13.VnB(), p15.VnB(), p12.VnB()),
- "trn1 p13.b, p15.b, p12.b");
- COMPARE_PREFIX(trn1(p13.VnH(), p15.VnH(), p12.VnH()),
- "trn1 p13.h, p15.h, p12.h");
- COMPARE_PREFIX(trn1(p13.VnS(), p15.VnS(), p12.VnS()),
- "trn1 p13.s, p15.s, p12.s");
- COMPARE_PREFIX(trn1(p13.VnD(), p15.VnD(), p12.VnD()),
- "trn1 p13.d, p15.d, p12.d");
- COMPARE_PREFIX(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b");
- COMPARE_PREFIX(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h");
- COMPARE_PREFIX(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s");
- COMPARE_PREFIX(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d");
- COMPARE_PREFIX(uzp1(p14.VnB(), p4.VnB(), p14.VnB()),
- "uzp1 p14.b, p4.b, p14.b");
- COMPARE_PREFIX(uzp1(p14.VnH(), p4.VnH(), p14.VnH()),
- "uzp1 p14.h, p4.h, p14.h");
- COMPARE_PREFIX(uzp1(p14.VnS(), p4.VnS(), p14.VnS()),
- "uzp1 p14.s, p4.s, p14.s");
- COMPARE_PREFIX(uzp1(p14.VnD(), p4.VnD(), p14.VnD()),
- "uzp1 p14.d, p4.d, p14.d");
- COMPARE_PREFIX(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b");
- COMPARE_PREFIX(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h");
- COMPARE_PREFIX(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s");
- COMPARE_PREFIX(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d");
- COMPARE_PREFIX(zip1(p13.VnB(), p4.VnB(), p12.VnB()),
- "zip1 p13.b, p4.b, p12.b");
- COMPARE_PREFIX(zip1(p13.VnH(), p4.VnH(), p12.VnH()),
- "zip1 p13.h, p4.h, p12.h");
- COMPARE_PREFIX(zip1(p13.VnS(), p4.VnS(), p12.VnS()),
- "zip1 p13.s, p4.s, p12.s");
- COMPARE_PREFIX(zip1(p13.VnD(), p4.VnD(), p12.VnD()),
- "zip1 p13.d, p4.d, p12.d");
- COMPARE_PREFIX(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b");
- COMPARE_PREFIX(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h");
- COMPARE_PREFIX(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s");
- COMPARE_PREFIX(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d");
- COMPARE_PREFIX(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b");
- COMPARE_PREFIX(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b");
+ COMPARE(rev(p15.VnB(), p6.VnB()), "rev p15.b, p6.b");
+ COMPARE(rev(p15.VnH(), p6.VnH()), "rev p15.h, p6.h");
+ COMPARE(rev(p15.VnS(), p6.VnS()), "rev p15.s, p6.s");
+ COMPARE(rev(p15.VnD(), p6.VnD()), "rev p15.d, p6.d");
+ COMPARE(trn1(p13.VnB(), p15.VnB(), p12.VnB()), "trn1 p13.b, p15.b, p12.b");
+ COMPARE(trn1(p13.VnH(), p15.VnH(), p12.VnH()), "trn1 p13.h, p15.h, p12.h");
+ COMPARE(trn1(p13.VnS(), p15.VnS(), p12.VnS()), "trn1 p13.s, p15.s, p12.s");
+ COMPARE(trn1(p13.VnD(), p15.VnD(), p12.VnD()), "trn1 p13.d, p15.d, p12.d");
+ COMPARE(trn2(p5.VnB(), p5.VnB(), p6.VnB()), "trn2 p5.b, p5.b, p6.b");
+ COMPARE(trn2(p5.VnH(), p5.VnH(), p6.VnH()), "trn2 p5.h, p5.h, p6.h");
+ COMPARE(trn2(p5.VnS(), p5.VnS(), p6.VnS()), "trn2 p5.s, p5.s, p6.s");
+ COMPARE(trn2(p5.VnD(), p5.VnD(), p6.VnD()), "trn2 p5.d, p5.d, p6.d");
+ COMPARE(uzp1(p14.VnB(), p4.VnB(), p14.VnB()), "uzp1 p14.b, p4.b, p14.b");
+ COMPARE(uzp1(p14.VnH(), p4.VnH(), p14.VnH()), "uzp1 p14.h, p4.h, p14.h");
+ COMPARE(uzp1(p14.VnS(), p4.VnS(), p14.VnS()), "uzp1 p14.s, p4.s, p14.s");
+ COMPARE(uzp1(p14.VnD(), p4.VnD(), p14.VnD()), "uzp1 p14.d, p4.d, p14.d");
+ COMPARE(uzp2(p6.VnB(), p11.VnB(), p2.VnB()), "uzp2 p6.b, p11.b, p2.b");
+ COMPARE(uzp2(p6.VnH(), p11.VnH(), p2.VnH()), "uzp2 p6.h, p11.h, p2.h");
+ COMPARE(uzp2(p6.VnS(), p11.VnS(), p2.VnS()), "uzp2 p6.s, p11.s, p2.s");
+ COMPARE(uzp2(p6.VnD(), p11.VnD(), p2.VnD()), "uzp2 p6.d, p11.d, p2.d");
+ COMPARE(zip1(p13.VnB(), p4.VnB(), p12.VnB()), "zip1 p13.b, p4.b, p12.b");
+ COMPARE(zip1(p13.VnH(), p4.VnH(), p12.VnH()), "zip1 p13.h, p4.h, p12.h");
+ COMPARE(zip1(p13.VnS(), p4.VnS(), p12.VnS()), "zip1 p13.s, p4.s, p12.s");
+ COMPARE(zip1(p13.VnD(), p4.VnD(), p12.VnD()), "zip1 p13.d, p4.d, p12.d");
+ COMPARE(zip2(p1.VnB(), p15.VnB(), p2.VnB()), "zip2 p1.b, p15.b, p2.b");
+ COMPARE(zip2(p1.VnH(), p15.VnH(), p2.VnH()), "zip2 p1.h, p15.h, p2.h");
+ COMPARE(zip2(p1.VnS(), p15.VnS(), p2.VnS()), "zip2 p1.s, p15.s, p2.s");
+ COMPARE(zip2(p1.VnD(), p15.VnD(), p2.VnD()), "zip2 p1.d, p15.d, p2.d");
+ COMPARE(punpkhi(p12.VnH(), p6.VnB()), "punpkhi p12.h, p6.b");
+ COMPARE(punpklo(p4.VnH(), p14.VnB()), "punpklo p4.h, p14.b");
CLEANUP();
}
@@ -5676,67 +5496,37 @@ TEST(sve_permute_vector_extract) {
"ext z2.b, z2.b, z10.b, #254");
COMPARE_MACRO(Ext(z2.VnB(), z2.VnB(), z10.VnB(), 255),
"ext z2.b, z2.b, z10.b, #255");
- COMPARE_MACRO(Ext(z2.VnB(), z4.VnB(), z10.VnB(), 127),
- "movprfx z2, z4\n"
- "ext z2.b, z2.b, z10.b, #127");
- COMPARE_MACRO(Ext(z2.VnB(), z12.VnB(), z2.VnB(), 2),
- "movprfx z31, z12\n"
- "ext z31.b, z31.b, z2.b, #2\n"
- "mov z2.d, z31.d");
+
CLEANUP();
}
TEST(sve_permute_vector_interleaving) {
SETUP();
- COMPARE_PREFIX(trn1(z25.VnB(), z31.VnB(), z17.VnB()),
- "trn1 z25.b, z31.b, z17.b");
- COMPARE_PREFIX(trn1(z25.VnH(), z31.VnH(), z17.VnH()),
- "trn1 z25.h, z31.h, z17.h");
- COMPARE_PREFIX(trn1(z25.VnS(), z31.VnS(), z17.VnS()),
- "trn1 z25.s, z31.s, z17.s");
- COMPARE_PREFIX(trn1(z25.VnD(), z31.VnD(), z17.VnD()),
- "trn1 z25.d, z31.d, z17.d");
- COMPARE_PREFIX(trn2(z23.VnB(), z19.VnB(), z5.VnB()),
- "trn2 z23.b, z19.b, z5.b");
- COMPARE_PREFIX(trn2(z23.VnH(), z19.VnH(), z5.VnH()),
- "trn2 z23.h, z19.h, z5.h");
- COMPARE_PREFIX(trn2(z23.VnS(), z19.VnS(), z5.VnS()),
- "trn2 z23.s, z19.s, z5.s");
- COMPARE_PREFIX(trn2(z23.VnD(), z19.VnD(), z5.VnD()),
- "trn2 z23.d, z19.d, z5.d");
- COMPARE_PREFIX(uzp1(z3.VnB(), z27.VnB(), z10.VnB()),
- "uzp1 z3.b, z27.b, z10.b");
- COMPARE_PREFIX(uzp1(z3.VnH(), z27.VnH(), z10.VnH()),
- "uzp1 z3.h, z27.h, z10.h");
- COMPARE_PREFIX(uzp1(z3.VnS(), z27.VnS(), z10.VnS()),
- "uzp1 z3.s, z27.s, z10.s");
- COMPARE_PREFIX(uzp1(z3.VnD(), z27.VnD(), z10.VnD()),
- "uzp1 z3.d, z27.d, z10.d");
- COMPARE_PREFIX(uzp2(z22.VnB(), z26.VnB(), z15.VnB()),
- "uzp2 z22.b, z26.b, z15.b");
- COMPARE_PREFIX(uzp2(z22.VnH(), z26.VnH(), z15.VnH()),
- "uzp2 z22.h, z26.h, z15.h");
- COMPARE_PREFIX(uzp2(z22.VnS(), z26.VnS(), z15.VnS()),
- "uzp2 z22.s, z26.s, z15.s");
- COMPARE_PREFIX(uzp2(z22.VnD(), z26.VnD(), z15.VnD()),
- "uzp2 z22.d, z26.d, z15.d");
- COMPARE_PREFIX(zip1(z31.VnB(), z2.VnB(), z20.VnB()),
- "zip1 z31.b, z2.b, z20.b");
- COMPARE_PREFIX(zip1(z31.VnH(), z2.VnH(), z20.VnH()),
- "zip1 z31.h, z2.h, z20.h");
- COMPARE_PREFIX(zip1(z31.VnS(), z2.VnS(), z20.VnS()),
- "zip1 z31.s, z2.s, z20.s");
- COMPARE_PREFIX(zip1(z31.VnD(), z2.VnD(), z20.VnD()),
- "zip1 z31.d, z2.d, z20.d");
- COMPARE_PREFIX(zip2(z15.VnB(), z23.VnB(), z12.VnB()),
- "zip2 z15.b, z23.b, z12.b");
- COMPARE_PREFIX(zip2(z15.VnH(), z23.VnH(), z12.VnH()),
- "zip2 z15.h, z23.h, z12.h");
- COMPARE_PREFIX(zip2(z15.VnS(), z23.VnS(), z12.VnS()),
- "zip2 z15.s, z23.s, z12.s");
- COMPARE_PREFIX(zip2(z15.VnD(), z23.VnD(), z12.VnD()),
- "zip2 z15.d, z23.d, z12.d");
+ COMPARE(trn1(z25.VnB(), z31.VnB(), z17.VnB()), "trn1 z25.b, z31.b, z17.b");
+ COMPARE(trn1(z25.VnH(), z31.VnH(), z17.VnH()), "trn1 z25.h, z31.h, z17.h");
+ COMPARE(trn1(z25.VnS(), z31.VnS(), z17.VnS()), "trn1 z25.s, z31.s, z17.s");
+ COMPARE(trn1(z25.VnD(), z31.VnD(), z17.VnD()), "trn1 z25.d, z31.d, z17.d");
+ COMPARE(trn2(z23.VnB(), z19.VnB(), z5.VnB()), "trn2 z23.b, z19.b, z5.b");
+ COMPARE(trn2(z23.VnH(), z19.VnH(), z5.VnH()), "trn2 z23.h, z19.h, z5.h");
+ COMPARE(trn2(z23.VnS(), z19.VnS(), z5.VnS()), "trn2 z23.s, z19.s, z5.s");
+ COMPARE(trn2(z23.VnD(), z19.VnD(), z5.VnD()), "trn2 z23.d, z19.d, z5.d");
+ COMPARE(uzp1(z3.VnB(), z27.VnB(), z10.VnB()), "uzp1 z3.b, z27.b, z10.b");
+ COMPARE(uzp1(z3.VnH(), z27.VnH(), z10.VnH()), "uzp1 z3.h, z27.h, z10.h");
+ COMPARE(uzp1(z3.VnS(), z27.VnS(), z10.VnS()), "uzp1 z3.s, z27.s, z10.s");
+ COMPARE(uzp1(z3.VnD(), z27.VnD(), z10.VnD()), "uzp1 z3.d, z27.d, z10.d");
+ COMPARE(uzp2(z22.VnB(), z26.VnB(), z15.VnB()), "uzp2 z22.b, z26.b, z15.b");
+ COMPARE(uzp2(z22.VnH(), z26.VnH(), z15.VnH()), "uzp2 z22.h, z26.h, z15.h");
+ COMPARE(uzp2(z22.VnS(), z26.VnS(), z15.VnS()), "uzp2 z22.s, z26.s, z15.s");
+ COMPARE(uzp2(z22.VnD(), z26.VnD(), z15.VnD()), "uzp2 z22.d, z26.d, z15.d");
+ COMPARE(zip1(z31.VnB(), z2.VnB(), z20.VnB()), "zip1 z31.b, z2.b, z20.b");
+ COMPARE(zip1(z31.VnH(), z2.VnH(), z20.VnH()), "zip1 z31.h, z2.h, z20.h");
+ COMPARE(zip1(z31.VnS(), z2.VnS(), z20.VnS()), "zip1 z31.s, z2.s, z20.s");
+ COMPARE(zip1(z31.VnD(), z2.VnD(), z20.VnD()), "zip1 z31.d, z2.d, z20.d");
+ COMPARE(zip2(z15.VnB(), z23.VnB(), z12.VnB()), "zip2 z15.b, z23.b, z12.b");
+ COMPARE(zip2(z15.VnH(), z23.VnH(), z12.VnH()), "zip2 z15.h, z23.h, z12.h");
+ COMPARE(zip2(z15.VnS(), z23.VnS(), z12.VnS()), "zip2 z15.s, z23.s, z12.s");
+ COMPARE(zip2(z15.VnD(), z23.VnD(), z12.VnD()), "zip2 z15.d, z23.d, z12.d");
CLEANUP();
}
@@ -5744,22 +5534,22 @@ TEST(sve_permute_vector_interleaving) {
TEST(sve_cpy_reg) {
SETUP();
- COMPARE_PREFIX(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
- COMPARE_PREFIX(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3");
- COMPARE_PREFIX(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5");
- COMPARE_PREFIX(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
- COMPARE_PREFIX(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
+ COMPARE(cpy(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
+ COMPARE(cpy(z2.VnH(), p6.Merging(), w3), "mov z2.h, p6/m, w3");
+ COMPARE(cpy(z3.VnS(), p7.Merging(), x5), "mov z3.s, p7/m, w5");
+ COMPARE(cpy(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
+ COMPARE(cpy(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
- COMPARE_PREFIX(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
- COMPARE_PREFIX(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23");
- COMPARE_PREFIX(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23");
- COMPARE_PREFIX(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
+ COMPARE(cpy(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
+ COMPARE(cpy(z27.VnH(), p3.Merging(), h23), "mov z27.h, p3/m, h23");
+ COMPARE(cpy(z27.VnS(), p3.Merging(), s23), "mov z27.s, p3/m, s23");
+ COMPARE(cpy(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
- COMPARE_PREFIX(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
- COMPARE_PREFIX(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
- COMPARE_PREFIX(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
- COMPARE_PREFIX(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
- COMPARE_PREFIX(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
+ COMPARE(mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
+ COMPARE(mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
+ COMPARE(mov(z5.VnD(), p7.Merging(), sp), "mov z5.d, p7/m, sp");
+ COMPARE(mov(z27.VnB(), p3.Merging(), b23), "mov z27.b, p3/m, b23");
+ COMPARE(mov(z27.VnD(), p3.Merging(), d23), "mov z27.d, p3/m, d23");
COMPARE_MACRO(Mov(z1.VnB(), p2.Merging(), wsp), "mov z1.b, p2/m, wsp");
COMPARE_MACRO(Mov(z4.VnD(), p7.Merging(), x30), "mov z4.d, p7/m, x30");
@@ -5773,41 +5563,41 @@ TEST(sve_cpy_reg) {
TEST(sve_permute_vector_predicated) {
SETUP();
- COMPARE_PREFIX(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s");
- COMPARE_PREFIX(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d");
- COMPARE_PREFIX(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()),
- "splice z7.b, p6, z7.b, z2.b");
- COMPARE_PREFIX(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()),
- "splice z7.h, p6, z7.h, z2.h");
- COMPARE_PREFIX(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()),
- "splice z7.s, p6, z7.s, z2.s");
- COMPARE_PREFIX(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()),
- "splice z7.d, p6, z7.d, z2.d");
-
- COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z3.VnB()),
+ COMPARE(compact(z13.VnS(), p7, z1.VnS()), "compact z13.s, p7, z1.s");
+ COMPARE(compact(z13.VnD(), p7, z1.VnD()), "compact z13.d, p7, z1.d");
+ COMPARE(splice(z7.VnB(), p6, z7.VnB(), z2.VnB()),
+ "splice z7.b, p6, z7.b, z2.b");
+ COMPARE(splice(z7.VnH(), p6, z7.VnH(), z2.VnH()),
+ "splice z7.h, p6, z7.h, z2.h");
+ COMPARE(splice(z7.VnS(), p6, z7.VnS(), z2.VnS()),
+ "splice z7.s, p6, z7.s, z2.s");
+ COMPARE(splice(z7.VnD(), p6, z7.VnD(), z2.VnD()),
+ "splice z7.d, p6, z7.d, z2.d");
+
+ COMPARE_MACRO(Splice(z0.VnB(), p1, z2.VnB(), z4.VnB()),
"movprfx z0, z2\n"
- "splice z0.b, p1, z0.b, z3.b");
+ "splice z0.b, p1, z0.b, z4.b");
COMPARE_MACRO(Splice(z0.VnH(), p1, z2.VnH(), z0.VnH()),
"movprfx z31, z2\n"
"splice z31.h, p1, z31.h, z0.h\n"
"mov z0.d, z31.d");
- COMPARE_PREFIX(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()),
- "clasta z4.b, p2, z4.b, z12.b");
- COMPARE_PREFIX(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()),
- "clasta z4.h, p2, z4.h, z12.h");
- COMPARE_PREFIX(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()),
- "clasta z4.s, p2, z4.s, z12.s");
- COMPARE_PREFIX(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()),
- "clasta z4.d, p2, z4.d, z12.d");
- COMPARE_PREFIX(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()),
- "clastb z29.b, p7, z29.b, z26.b");
- COMPARE_PREFIX(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()),
- "clastb z29.h, p7, z29.h, z26.h");
- COMPARE_PREFIX(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()),
- "clastb z29.s, p7, z29.s, z26.s");
- COMPARE_PREFIX(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()),
- "clastb z29.d, p7, z29.d, z26.d");
+ COMPARE(clasta(z4.VnB(), p2, z4.VnB(), z12.VnB()),
+ "clasta z4.b, p2, z4.b, z12.b");
+ COMPARE(clasta(z4.VnH(), p2, z4.VnH(), z12.VnH()),
+ "clasta z4.h, p2, z4.h, z12.h");
+ COMPARE(clasta(z4.VnS(), p2, z4.VnS(), z12.VnS()),
+ "clasta z4.s, p2, z4.s, z12.s");
+ COMPARE(clasta(z4.VnD(), p2, z4.VnD(), z12.VnD()),
+ "clasta z4.d, p2, z4.d, z12.d");
+ COMPARE(clastb(z29.VnB(), p7, z29.VnB(), z26.VnB()),
+ "clastb z29.b, p7, z29.b, z26.b");
+ COMPARE(clastb(z29.VnH(), p7, z29.VnH(), z26.VnH()),
+ "clastb z29.h, p7, z29.h, z26.h");
+ COMPARE(clastb(z29.VnS(), p7, z29.VnS(), z26.VnS()),
+ "clastb z29.s, p7, z29.s, z26.s");
+ COMPARE(clastb(z29.VnD(), p7, z29.VnD(), z26.VnD()),
+ "clastb z29.d, p7, z29.d, z26.d");
COMPARE_MACRO(Clasta(z5.VnD(), p2, z4.VnD(), z12.VnD()),
"movprfx z5, z4\n"
@@ -5822,41 +5612,41 @@ TEST(sve_permute_vector_predicated) {
COMPARE_MACRO(Clastb(z1.VnS(), p1, z1.VnS(), z1.VnS()),
"clastb z1.s, p1, z1.s, z1.s");
- COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b");
- COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h");
- COMPARE_PREFIX(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s");
- COMPARE_PREFIX(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d");
- COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b");
- COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h");
- COMPARE_PREFIX(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s");
- COMPARE_PREFIX(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d");
-
- COMPARE_PREFIX(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b");
- COMPARE_PREFIX(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h");
- COMPARE_PREFIX(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s");
- COMPARE_PREFIX(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d");
- COMPARE_PREFIX(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b");
- COMPARE_PREFIX(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h");
- COMPARE_PREFIX(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s");
- COMPARE_PREFIX(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d");
-
- COMPARE_PREFIX(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b");
- COMPARE_PREFIX(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h");
- COMPARE_PREFIX(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s");
- COMPARE_PREFIX(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d");
- COMPARE_PREFIX(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b");
- COMPARE_PREFIX(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h");
- COMPARE_PREFIX(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s");
- COMPARE_PREFIX(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d");
-
- COMPARE_PREFIX(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b");
- COMPARE_PREFIX(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h");
- COMPARE_PREFIX(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s");
- COMPARE_PREFIX(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d");
- COMPARE_PREFIX(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b");
- COMPARE_PREFIX(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h");
- COMPARE_PREFIX(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s");
- COMPARE_PREFIX(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d");
+ COMPARE(clasta(w6, p0, w6, z13.VnB()), "clasta w6, p0, w6, z13.b");
+ COMPARE(clasta(w6, p0, w6, z13.VnH()), "clasta w6, p0, w6, z13.h");
+ COMPARE(clasta(w6, p0, w6, z13.VnS()), "clasta w6, p0, w6, z13.s");
+ COMPARE(clasta(x6, p0, x6, z13.VnD()), "clasta x6, p0, x6, z13.d");
+ COMPARE(clastb(w21, p2, w21, z27.VnB()), "clastb w21, p2, w21, z27.b");
+ COMPARE(clastb(w21, p2, w21, z27.VnH()), "clastb w21, p2, w21, z27.h");
+ COMPARE(clastb(w21, p2, w21, z27.VnS()), "clastb w21, p2, w21, z27.s");
+ COMPARE(clastb(x21, p2, x21, z27.VnD()), "clastb x21, p2, x21, z27.d");
+
+ COMPARE(clasta(b8, p6, b8, z7.VnB()), "clasta b8, p6, b8, z7.b");
+ COMPARE(clasta(h8, p6, h8, z7.VnH()), "clasta h8, p6, h8, z7.h");
+ COMPARE(clasta(s8, p6, s8, z7.VnS()), "clasta s8, p6, s8, z7.s");
+ COMPARE(clasta(d8, p6, d8, z7.VnD()), "clasta d8, p6, d8, z7.d");
+ COMPARE(clastb(b17, p0, b17, z19.VnB()), "clastb b17, p0, b17, z19.b");
+ COMPARE(clastb(h17, p0, h17, z19.VnH()), "clastb h17, p0, h17, z19.h");
+ COMPARE(clastb(s17, p0, s17, z19.VnS()), "clastb s17, p0, s17, z19.s");
+ COMPARE(clastb(d17, p0, d17, z19.VnD()), "clastb d17, p0, d17, z19.d");
+
+ COMPARE(lasta(w15, p3, z3.VnB()), "lasta w15, p3, z3.b");
+ COMPARE(lasta(w15, p3, z3.VnH()), "lasta w15, p3, z3.h");
+ COMPARE(lasta(w15, p3, z3.VnS()), "lasta w15, p3, z3.s");
+ COMPARE(lasta(x15, p3, z3.VnD()), "lasta x15, p3, z3.d");
+ COMPARE(lasta(b30, p4, z24.VnB()), "lasta b30, p4, z24.b");
+ COMPARE(lasta(h30, p4, z24.VnH()), "lasta h30, p4, z24.h");
+ COMPARE(lasta(s30, p4, z24.VnS()), "lasta s30, p4, z24.s");
+ COMPARE(lasta(d30, p4, z24.VnD()), "lasta d30, p4, z24.d");
+
+ COMPARE(lastb(w9, p2, z16.VnB()), "lastb w9, p2, z16.b");
+ COMPARE(lastb(w9, p2, z16.VnH()), "lastb w9, p2, z16.h");
+ COMPARE(lastb(w9, p2, z16.VnS()), "lastb w9, p2, z16.s");
+ COMPARE(lastb(x9, p2, z16.VnD()), "lastb x9, p2, z16.d");
+ COMPARE(lastb(b14, p5, z2.VnB()), "lastb b14, p5, z2.b");
+ COMPARE(lastb(h14, p5, z2.VnH()), "lastb h14, p5, z2.h");
+ COMPARE(lastb(s14, p5, z2.VnS()), "lastb s14, p5, z2.s");
+ COMPARE(lastb(d14, p5, z2.VnD()), "lastb d14, p5, z2.d");
CLEANUP();
}
@@ -5891,19 +5681,19 @@ TEST(sve_reverse) {
TEST(sve_permute_vector_unpredicated) {
SETUP();
- COMPARE_PREFIX(dup(z4.VnB(), w7), "mov z4.b, w7");
- COMPARE_PREFIX(dup(z5.VnH(), w6), "mov z5.h, w6");
- COMPARE_PREFIX(dup(z6.VnS(), sp), "mov z6.s, wsp");
- COMPARE_PREFIX(dup(z7.VnD(), x4), "mov z7.d, x4");
- COMPARE_PREFIX(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
- COMPARE_PREFIX(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12");
+ COMPARE(dup(z4.VnB(), w7), "mov z4.b, w7");
+ COMPARE(dup(z5.VnH(), w6), "mov z5.h, w6");
+ COMPARE(dup(z6.VnS(), sp), "mov z6.s, wsp");
+ COMPARE(dup(z7.VnD(), x4), "mov z7.d, x4");
+ COMPARE(dup(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
+ COMPARE(dup(z26.VnH(), z12.VnH(), 0), "mov z26.h, h12");
- COMPARE_PREFIX(mov(z4.VnB(), w7), "mov z4.b, w7");
- COMPARE_PREFIX(mov(z5.VnH(), w6), "mov z5.h, w6");
- COMPARE_PREFIX(mov(z6.VnS(), sp), "mov z6.s, wsp");
- COMPARE_PREFIX(mov(z7.VnD(), x4), "mov z7.d, x4");
- COMPARE_PREFIX(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
- COMPARE_PREFIX(mov(z0.VnS(), s1), "mov z0.s, s1");
+ COMPARE(mov(z4.VnB(), w7), "mov z4.b, w7");
+ COMPARE(mov(z5.VnH(), w6), "mov z5.h, w6");
+ COMPARE(mov(z6.VnS(), sp), "mov z6.s, wsp");
+ COMPARE(mov(z7.VnD(), x4), "mov z7.d, x4");
+ COMPARE(mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
+ COMPARE(mov(z0.VnS(), s1), "mov z0.s, s1");
COMPARE_MACRO(Mov(z7.VnD(), x4), "mov z7.d, x4");
COMPARE_MACRO(Mov(z25.VnQ(), z28.VnQ(), 2), "mov z25.q, z28.q[2]");
@@ -5921,30 +5711,26 @@ TEST(sve_permute_vector_unpredicated) {
COMPARE(insr(z6.VnH(), h15), "insr z6.h, h15");
COMPARE(insr(z7.VnS(), s22), "insr z7.s, s22");
COMPARE(insr(z8.VnD(), d30), "insr z8.d, d30");
- COMPARE_PREFIX(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b");
- COMPARE_PREFIX(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h");
- COMPARE_PREFIX(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s");
- COMPARE_PREFIX(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d");
- COMPARE_PREFIX(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b");
- COMPARE_PREFIX(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h");
- COMPARE_PREFIX(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s");
- COMPARE_PREFIX(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b");
- COMPARE_PREFIX(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h");
- COMPARE_PREFIX(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s");
- COMPARE_PREFIX(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b");
- COMPARE_PREFIX(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h");
- COMPARE_PREFIX(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s");
- COMPARE_PREFIX(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b");
- COMPARE_PREFIX(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h");
- COMPARE_PREFIX(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s");
- COMPARE_PREFIX(tbl(z24.VnB(), z29.VnB(), z0.VnB()),
- "tbl z24.b, {z29.b}, z0.b");
- COMPARE_PREFIX(tbl(z25.VnH(), z29.VnH(), z1.VnH()),
- "tbl z25.h, {z29.h}, z1.h");
- COMPARE_PREFIX(tbl(z26.VnS(), z29.VnS(), z2.VnS()),
- "tbl z26.s, {z29.s}, z2.s");
- COMPARE_PREFIX(tbl(z27.VnD(), z29.VnD(), z3.VnD()),
- "tbl z27.d, {z29.d}, z3.d");
+ COMPARE(rev(z13.VnB(), z10.VnB()), "rev z13.b, z10.b");
+ COMPARE(rev(z14.VnH(), z10.VnH()), "rev z14.h, z10.h");
+ COMPARE(rev(z15.VnS(), z10.VnS()), "rev z15.s, z10.s");
+ COMPARE(rev(z16.VnD(), z10.VnD()), "rev z16.d, z10.d");
+ COMPARE(sunpkhi(z10.VnH(), z11.VnB()), "sunpkhi z10.h, z11.b");
+ COMPARE(sunpkhi(z11.VnS(), z11.VnH()), "sunpkhi z11.s, z11.h");
+ COMPARE(sunpkhi(z12.VnD(), z11.VnS()), "sunpkhi z12.d, z11.s");
+ COMPARE(sunpklo(z20.VnH(), z12.VnB()), "sunpklo z20.h, z12.b");
+ COMPARE(sunpklo(z21.VnS(), z12.VnH()), "sunpklo z21.s, z12.h");
+ COMPARE(sunpklo(z22.VnD(), z12.VnS()), "sunpklo z22.d, z12.s");
+ COMPARE(uunpkhi(z17.VnH(), z14.VnB()), "uunpkhi z17.h, z14.b");
+ COMPARE(uunpkhi(z18.VnS(), z14.VnH()), "uunpkhi z18.s, z14.h");
+ COMPARE(uunpkhi(z19.VnD(), z14.VnS()), "uunpkhi z19.d, z14.s");
+ COMPARE(uunpklo(z27.VnH(), z6.VnB()), "uunpklo z27.h, z6.b");
+ COMPARE(uunpklo(z28.VnS(), z6.VnH()), "uunpklo z28.s, z6.h");
+ COMPARE(uunpklo(z29.VnD(), z6.VnS()), "uunpklo z29.d, z6.s");
+ COMPARE(tbl(z24.VnB(), z29.VnB(), z0.VnB()), "tbl z24.b, {z29.b}, z0.b");
+ COMPARE(tbl(z25.VnH(), z29.VnH(), z1.VnH()), "tbl z25.h, {z29.h}, z1.h");
+ COMPARE(tbl(z26.VnS(), z29.VnS(), z2.VnS()), "tbl z26.s, {z29.s}, z2.s");
+ COMPARE(tbl(z27.VnD(), z29.VnD(), z3.VnD()), "tbl z27.d, {z29.d}, z3.d");
CLEANUP();
}
@@ -5952,10 +5738,10 @@ TEST(sve_permute_vector_unpredicated) {
TEST(sve_predicate_count) {
SETUP();
- COMPARE_PREFIX(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b");
- COMPARE_PREFIX(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h");
- COMPARE_PREFIX(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s");
- COMPARE_PREFIX(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d");
+ COMPARE(cntp(x9, p1, p0.VnB()), "cntp x9, p1, p0.b");
+ COMPARE(cntp(x10, p12, p1.VnH()), "cntp x10, p12, p1.h");
+ COMPARE(cntp(x11, p13, p14.VnS()), "cntp x11, p13, p14.s");
+ COMPARE(cntp(x12, p4, p15.VnD()), "cntp x12, p4, p15.d");
COMPARE_MACRO(Cntp(x0, p1, p2.VnB()), "cntp x0, p1, p2.b");
COMPARE_MACRO(Cntp(w10, p11, p12.VnH()), "cntp x10, p11, p12.h");
@@ -5966,61 +5752,56 @@ TEST(sve_predicate_count) {
TEST(sve_predicate_logical_op) {
SETUP();
- COMPARE_PREFIX(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()),
- "ands p13.b, p9/z, p5.b, p15.b");
- COMPARE_PREFIX(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()),
- "and p9.b, p3/z, p0.b, p14.b");
- COMPARE_PREFIX(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()),
- "bics p8.b, p5/z, p3.b, p1.b");
- COMPARE_PREFIX(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()),
- "bic p5.b, p5/z, p9.b, p9.b");
- COMPARE_PREFIX(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()),
- "eors p11.b, p1/z, p1.b, p2.b");
- COMPARE_PREFIX(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()),
- "eor p8.b, p6/z, p1.b, p11.b");
- COMPARE_PREFIX(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()),
- "nands p13.b, p0/z, p9.b, p4.b");
- COMPARE_PREFIX(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()),
- "nand p7.b, p7/z, p15.b, p2.b");
- COMPARE_PREFIX(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()),
- "nors p8.b, p8/z, p12.b, p11.b");
- COMPARE_PREFIX(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()),
- "nor p3.b, p6/z, p15.b, p12.b");
- COMPARE_PREFIX(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()),
- "orns p10.b, p11/z, p0.b, p15.b");
- COMPARE_PREFIX(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()),
- "orn p0.b, p1/z, p7.b, p4.b");
- COMPARE_PREFIX(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()),
- "orrs p14.b, p6/z, p1.b, p5.b");
- COMPARE_PREFIX(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()),
- "orr p13.b, p7/z, p10.b, p4.b");
- COMPARE_PREFIX(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()),
- "sel p9.b, p15, p15.b, p7.b");
+ COMPARE(ands(p13.VnB(), p9.Zeroing(), p5.VnB(), p15.VnB()),
+ "ands p13.b, p9/z, p5.b, p15.b");
+ COMPARE(and_(p9.VnB(), p3.Zeroing(), p0.VnB(), p14.VnB()),
+ "and p9.b, p3/z, p0.b, p14.b");
+ COMPARE(bics(p8.VnB(), p5.Zeroing(), p3.VnB(), p1.VnB()),
+ "bics p8.b, p5/z, p3.b, p1.b");
+ COMPARE(bic(p5.VnB(), p5.Zeroing(), p9.VnB(), p9.VnB()),
+ "bic p5.b, p5/z, p9.b, p9.b");
+ COMPARE(eors(p11.VnB(), p1.Zeroing(), p1.VnB(), p2.VnB()),
+ "eors p11.b, p1/z, p1.b, p2.b");
+ COMPARE(eor(p8.VnB(), p6.Zeroing(), p1.VnB(), p11.VnB()),
+ "eor p8.b, p6/z, p1.b, p11.b");
+ COMPARE(nands(p13.VnB(), p0.Zeroing(), p9.VnB(), p4.VnB()),
+ "nands p13.b, p0/z, p9.b, p4.b");
+ COMPARE(nand(p7.VnB(), p7.Zeroing(), p15.VnB(), p2.VnB()),
+ "nand p7.b, p7/z, p15.b, p2.b");
+ COMPARE(nors(p8.VnB(), p8.Zeroing(), p12.VnB(), p11.VnB()),
+ "nors p8.b, p8/z, p12.b, p11.b");
+ COMPARE(nor(p3.VnB(), p6.Zeroing(), p15.VnB(), p12.VnB()),
+ "nor p3.b, p6/z, p15.b, p12.b");
+ COMPARE(orns(p10.VnB(), p11.Zeroing(), p0.VnB(), p15.VnB()),
+ "orns p10.b, p11/z, p0.b, p15.b");
+ COMPARE(orn(p0.VnB(), p1.Zeroing(), p7.VnB(), p4.VnB()),
+ "orn p0.b, p1/z, p7.b, p4.b");
+ COMPARE(orrs(p14.VnB(), p6.Zeroing(), p1.VnB(), p5.VnB()),
+ "orrs p14.b, p6/z, p1.b, p5.b");
+ COMPARE(orr(p13.VnB(), p7.Zeroing(), p10.VnB(), p4.VnB()),
+ "orr p13.b, p7/z, p10.b, p4.b");
+ COMPARE(sel(p9.VnB(), p15, p15.VnB(), p7.VnB()),
+ "sel p9.b, p15, p15.b, p7.b");
// Aliases.
- COMPARE_PREFIX(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()),
- "not p7.b, p6/z, p1.b");
- COMPARE_PREFIX(not_(p7.VnB(), p6.Zeroing(), p1.VnB()),
- "not p7.b, p6/z, p1.b");
- COMPARE_PREFIX(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()),
- "nots p6.b, p5/z, p2.b");
- COMPARE_PREFIX(nots(p6.VnB(), p5.Zeroing(), p2.VnB()),
- "nots p6.b, p5/z, p2.b");
- COMPARE_PREFIX(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
- "movs p5.b, p4/z, p3.b");
- COMPARE_PREFIX(movs(p5.VnB(), p4.Zeroing(), p3.VnB()),
- "movs p5.b, p4/z, p3.b");
- COMPARE_PREFIX(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
- "mov p5.b, p4/z, p3.b");
- COMPARE_PREFIX(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b");
- COMPARE_PREFIX(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()),
- "movs p4.b, p3.b");
- COMPARE_PREFIX(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b");
- COMPARE_PREFIX(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()),
- "mov p4.b, p3.b");
- COMPARE_PREFIX(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b");
- COMPARE_PREFIX(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b");
- COMPARE_PREFIX(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b");
+ COMPARE(eor(p7.VnB(), p6.Zeroing(), p1.VnB(), p6.VnB()),
+ "not p7.b, p6/z, p1.b");
+ COMPARE(not_(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b");
+ COMPARE(eors(p6.VnB(), p5.Zeroing(), p2.VnB(), p5.VnB()),
+ "nots p6.b, p5/z, p2.b");
+ COMPARE(nots(p6.VnB(), p5.Zeroing(), p2.VnB()), "nots p6.b, p5/z, p2.b");
+ COMPARE(ands(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
+ "movs p5.b, p4/z, p3.b");
+ COMPARE(movs(p5.VnB(), p4.Zeroing(), p3.VnB()), "movs p5.b, p4/z, p3.b");
+ COMPARE(and_(p5.VnB(), p4.Zeroing(), p3.VnB(), p3.VnB()),
+ "mov p5.b, p4/z, p3.b");
+ COMPARE(mov(p5.VnB(), p4.Zeroing(), p3.VnB()), "mov p5.b, p4/z, p3.b");
+ COMPARE(orrs(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "movs p4.b, p3.b");
+ COMPARE(movs(p4.VnB(), p3.VnB()), "movs p4.b, p3.b");
+ COMPARE(orr(p4.VnB(), p3.Zeroing(), p3.VnB(), p3.VnB()), "mov p4.b, p3.b");
+ COMPARE(mov(p4.VnB(), p3.VnB()), "mov p4.b, p3.b");
+ COMPARE(sel(p3.VnB(), p2, p4.VnB(), p3.VnB()), "mov p3.b, p2/m, p4.b");
+ COMPARE(mov(p3.VnB(), p2.Merging(), p4.VnB()), "mov p3.b, p2/m, p4.b");
COMPARE_MACRO(Not(p7.VnB(), p6.Zeroing(), p1.VnB()), "not p7.b, p6/z, p1.b");
COMPARE_MACRO(Nots(p6.VnB(), p5.Zeroing(), p2.VnB()),
@@ -6038,8 +5819,8 @@ TEST(sve_predicate_logical_op) {
TEST(sve_predicate_first_active) {
SETUP();
- COMPARE_PREFIX(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b");
- COMPARE_PREFIX(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b");
+ COMPARE(pfirst(p0.VnB(), p7, p0.VnB()), "pfirst p0.b, p7, p0.b");
+ COMPARE(pfirst(p7.VnB(), p0, p7.VnB()), "pfirst p7.b, p0, p7.b");
COMPARE_MACRO(Pfirst(p1.VnB(), p2, p1.VnB()), "pfirst p1.b, p2, p1.b");
COMPARE_MACRO(Pfirst(p3.VnB(), p4, p5.VnB()),
@@ -6061,15 +5842,15 @@ TEST(sve_predicate_first_active) {
TEST(sve_predicate_next_active) {
SETUP();
- COMPARE_PREFIX(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b");
- COMPARE_PREFIX(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h");
- COMPARE_PREFIX(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s");
- COMPARE_PREFIX(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d");
+ COMPARE(pnext(p0.VnB(), p8, p0.VnB()), "pnext p0.b, p8, p0.b");
+ COMPARE(pnext(p1.VnH(), p9, p1.VnH()), "pnext p1.h, p9, p1.h");
+ COMPARE(pnext(p2.VnS(), p10, p2.VnS()), "pnext p2.s, p10, p2.s");
+ COMPARE(pnext(p3.VnD(), p11, p3.VnD()), "pnext p3.d, p11, p3.d");
- COMPARE_PREFIX(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b");
- COMPARE_PREFIX(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h");
- COMPARE_PREFIX(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s");
- COMPARE_PREFIX(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d");
+ COMPARE(pnext(p12.VnB(), p4, p12.VnB()), "pnext p12.b, p4, p12.b");
+ COMPARE(pnext(p13.VnH(), p5, p13.VnH()), "pnext p13.h, p5, p13.h");
+ COMPARE(pnext(p14.VnS(), p6, p14.VnS()), "pnext p14.s, p6, p14.s");
+ COMPARE(pnext(p15.VnD(), p7, p15.VnD()), "pnext p15.d, p7, p15.d");
COMPARE_MACRO(Pnext(p5.VnB(), p9, p5.VnB()), "pnext p5.b, p9, p5.b");
COMPARE_MACRO(Pnext(p6.VnH(), p8, p6.VnH()), "pnext p6.h, p8, p6.h");
@@ -6117,44 +5898,44 @@ TEST(sve_predicate_initialize) {
SETUP();
// Basic forms.
- COMPARE_PREFIX(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2");
- COMPARE_PREFIX(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1");
- COMPARE_PREFIX(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8");
- COMPARE_PREFIX(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16");
- COMPARE_PREFIX(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256");
- COMPARE_PREFIX(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3");
- COMPARE_PREFIX(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4");
- COMPARE_PREFIX(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d");
-
- COMPARE_PREFIX(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b");
- COMPARE_PREFIX(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4");
- COMPARE_PREFIX(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3");
- COMPARE_PREFIX(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256");
- COMPARE_PREFIX(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16");
- COMPARE_PREFIX(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8");
- COMPARE_PREFIX(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1");
- COMPARE_PREFIX(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2");
+ COMPARE(ptrue(p0.VnB(), SVE_POW2), "ptrue p0.b, pow2");
+ COMPARE(ptrue(p1.VnH(), SVE_VL1), "ptrue p1.h, vl1");
+ COMPARE(ptrue(p2.VnS(), SVE_VL8), "ptrue p2.s, vl8");
+ COMPARE(ptrue(p3.VnD(), SVE_VL16), "ptrue p3.d, vl16");
+ COMPARE(ptrue(p4.VnB(), SVE_VL256), "ptrue p4.b, vl256");
+ COMPARE(ptrue(p5.VnH(), SVE_MUL3), "ptrue p5.h, mul3");
+ COMPARE(ptrue(p6.VnS(), SVE_MUL4), "ptrue p6.s, mul4");
+ COMPARE(ptrue(p7.VnD(), SVE_ALL), "ptrue p7.d");
+
+ COMPARE(ptrues(p8.VnB(), SVE_ALL), "ptrues p8.b");
+ COMPARE(ptrues(p9.VnH(), SVE_MUL4), "ptrues p9.h, mul4");
+ COMPARE(ptrues(p10.VnS(), SVE_MUL3), "ptrues p10.s, mul3");
+ COMPARE(ptrues(p11.VnD(), SVE_VL256), "ptrues p11.d, vl256");
+ COMPARE(ptrues(p12.VnB(), SVE_VL16), "ptrues p12.b, vl16");
+ COMPARE(ptrues(p13.VnH(), SVE_VL8), "ptrues p13.h, vl8");
+ COMPARE(ptrues(p14.VnS(), SVE_VL1), "ptrues p14.s, vl1");
+ COMPARE(ptrues(p15.VnD(), SVE_POW2), "ptrues p15.d, pow2");
// The Assembler supports arbitrary immediates.
- COMPARE_PREFIX(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256");
- COMPARE_PREFIX(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe");
- COMPARE_PREFIX(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15");
- COMPARE_PREFIX(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19");
- COMPARE_PREFIX(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a");
- COMPARE_PREFIX(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c");
- COMPARE_PREFIX(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4");
-
- COMPARE_PREFIX(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256");
- COMPARE_PREFIX(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe");
- COMPARE_PREFIX(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15");
- COMPARE_PREFIX(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19");
- COMPARE_PREFIX(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a");
- COMPARE_PREFIX(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c");
- COMPARE_PREFIX(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4");
+ COMPARE(ptrue(p7.VnS(), 0xd), "ptrue p7.s, vl256");
+ COMPARE(ptrue(p8.VnD(), 0xe), "ptrue p8.d, #0xe");
+ COMPARE(ptrue(p9.VnB(), 0x15), "ptrue p9.b, #0x15");
+ COMPARE(ptrue(p10.VnH(), 0x19), "ptrue p10.h, #0x19");
+ COMPARE(ptrue(p11.VnS(), 0x1a), "ptrue p11.s, #0x1a");
+ COMPARE(ptrue(p12.VnD(), 0x1c), "ptrue p12.d, #0x1c");
+ COMPARE(ptrue(p13.VnB(), 0x1d), "ptrue p13.b, mul4");
+
+ COMPARE(ptrues(p14.VnS(), 0xd), "ptrues p14.s, vl256");
+ COMPARE(ptrues(p15.VnD(), 0xe), "ptrues p15.d, #0xe");
+ COMPARE(ptrues(p0.VnB(), 0x15), "ptrues p0.b, #0x15");
+ COMPARE(ptrues(p1.VnH(), 0x19), "ptrues p1.h, #0x19");
+ COMPARE(ptrues(p2.VnS(), 0x1a), "ptrues p2.s, #0x1a");
+ COMPARE(ptrues(p3.VnD(), 0x1c), "ptrues p3.d, #0x1c");
+ COMPARE(ptrues(p4.VnB(), 0x1d), "ptrues p4.b, mul4");
// SVE_ALL is the default.
- COMPARE_PREFIX(ptrue(p15.VnS()), "ptrue p15.s");
- COMPARE_PREFIX(ptrues(p0.VnS()), "ptrues p0.s");
+ COMPARE(ptrue(p15.VnS()), "ptrue p15.s");
+ COMPARE(ptrues(p0.VnS()), "ptrues p0.s");
// The MacroAssembler provides a `FlagsUpdate` argument.
COMPARE_MACRO(Ptrue(p0.VnB(), SVE_MUL3), "ptrue p0.b, mul3");
@@ -6166,8 +5947,8 @@ TEST(sve_predicate_initialize) {
TEST(sve_pfalse) {
SETUP();
- COMPARE_PREFIX(pfalse(p0.VnB()), "pfalse p0.b");
- COMPARE_PREFIX(pfalse(p15.VnB()), "pfalse p15.b");
+ COMPARE(pfalse(p0.VnB()), "pfalse p0.b");
+ COMPARE(pfalse(p15.VnB()), "pfalse p15.b");
COMPARE_MACRO(Pfalse(p1.VnB()), "pfalse p1.b");
COMPARE_MACRO(Pfalse(p4.VnH()), "pfalse p4.b");
@@ -6178,9 +5959,9 @@ TEST(sve_pfalse) {
TEST(sve_ptest) {
SETUP();
- COMPARE_PREFIX(ptest(p15, p0.VnB()), "ptest p15, p0.b");
- COMPARE_PREFIX(ptest(p0, p15.VnB()), "ptest p0, p15.b");
- COMPARE_PREFIX(ptest(p6, p6.VnB()), "ptest p6, p6.b");
+ COMPARE(ptest(p15, p0.VnB()), "ptest p15, p0.b");
+ COMPARE(ptest(p0, p15.VnB()), "ptest p0, p15.b");
+ COMPARE(ptest(p6, p6.VnB()), "ptest p6, p6.b");
COMPARE_MACRO(Ptest(p0, p1.VnB()), "ptest p0, p1.b");
}
@@ -6212,9 +5993,9 @@ TEST(sve_lane_size_relaxing) {
TEST(sve_read_ffr) {
SETUP();
- COMPARE_PREFIX(rdffr(p13.VnB()), "rdffr p13.b");
- COMPARE_PREFIX(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z");
- COMPARE_PREFIX(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z");
+ COMPARE(rdffr(p13.VnB()), "rdffr p13.b");
+ COMPARE(rdffrs(p14.VnB(), p9.Zeroing()), "rdffrs p14.b, p9/z");
+ COMPARE(rdffr(p5.VnB(), p14.Zeroing()), "rdffr p5.b, p14/z");
CLEANUP();
}
@@ -6222,14 +6003,14 @@ TEST(sve_read_ffr) {
TEST(sve_propagate_break) {
SETUP();
- COMPARE_PREFIX(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()),
- "brkpas p12.b, p0/z, p12.b, p11.b");
- COMPARE_PREFIX(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()),
- "brkpa p1.b, p2/z, p13.b, p8.b");
- COMPARE_PREFIX(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()),
- "brkpbs p14.b, p1/z, p8.b, p3.b");
- COMPARE_PREFIX(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()),
- "brkpb p2.b, p5/z, p0.b, p14.b");
+ COMPARE(brkpas(p12.VnB(), p0.Zeroing(), p12.VnB(), p11.VnB()),
+ "brkpas p12.b, p0/z, p12.b, p11.b");
+ COMPARE(brkpa(p1.VnB(), p2.Zeroing(), p13.VnB(), p8.VnB()),
+ "brkpa p1.b, p2/z, p13.b, p8.b");
+ COMPARE(brkpbs(p14.VnB(), p1.Zeroing(), p8.VnB(), p3.VnB()),
+ "brkpbs p14.b, p1/z, p8.b, p3.b");
+ COMPARE(brkpb(p2.VnB(), p5.Zeroing(), p0.VnB(), p14.VnB()),
+ "brkpb p2.b, p5/z, p0.b, p14.b");
CLEANUP();
}
@@ -6237,22 +6018,22 @@ TEST(sve_propagate_break) {
TEST(sve_stack_allocation) {
SETUP();
- COMPARE_PREFIX(rdvl(x26, 0), "rdvl x26, #0");
- COMPARE_PREFIX(rdvl(x27, 31), "rdvl x27, #31");
- COMPARE_PREFIX(rdvl(x28, -32), "rdvl x28, #-32");
- COMPARE_PREFIX(rdvl(xzr, 9), "rdvl xzr, #9");
+ COMPARE(rdvl(x26, 0), "rdvl x26, #0");
+ COMPARE(rdvl(x27, 31), "rdvl x27, #31");
+ COMPARE(rdvl(x28, -32), "rdvl x28, #-32");
+ COMPARE(rdvl(xzr, 9), "rdvl xzr, #9");
- COMPARE_PREFIX(addvl(x6, x20, 0), "addvl x6, x20, #0");
- COMPARE_PREFIX(addvl(x7, x21, 31), "addvl x7, x21, #31");
- COMPARE_PREFIX(addvl(x8, x22, -32), "addvl x8, x22, #-32");
- COMPARE_PREFIX(addvl(sp, x1, 5), "addvl sp, x1, #5");
- COMPARE_PREFIX(addvl(x9, sp, -16), "addvl x9, sp, #-16");
+ COMPARE(addvl(x6, x20, 0), "addvl x6, x20, #0");
+ COMPARE(addvl(x7, x21, 31), "addvl x7, x21, #31");
+ COMPARE(addvl(x8, x22, -32), "addvl x8, x22, #-32");
+ COMPARE(addvl(sp, x1, 5), "addvl sp, x1, #5");
+ COMPARE(addvl(x9, sp, -16), "addvl x9, sp, #-16");
- COMPARE_PREFIX(addpl(x20, x6, 0), "addpl x20, x6, #0");
- COMPARE_PREFIX(addpl(x21, x7, 31), "addpl x21, x7, #31");
- COMPARE_PREFIX(addpl(x22, x8, -32), "addpl x22, x8, #-32");
- COMPARE_PREFIX(addpl(sp, x1, 5), "addpl sp, x1, #5");
- COMPARE_PREFIX(addpl(x9, sp, -16), "addpl x9, sp, #-16");
+ COMPARE(addpl(x20, x6, 0), "addpl x20, x6, #0");
+ COMPARE(addpl(x21, x7, 31), "addpl x21, x7, #31");
+ COMPARE(addpl(x22, x8, -32), "addpl x22, x8, #-32");
+ COMPARE(addpl(sp, x1, 5), "addpl sp, x1, #5");
+ COMPARE(addpl(x9, sp, -16), "addpl x9, sp, #-16");
CLEANUP();
}
@@ -6418,10 +6199,2547 @@ TEST(sve_write_ffr) {
SETUP();
COMPARE_PREFIX(setffr(), "setffr");
- COMPARE_PREFIX(wrffr(p9.VnB()), "wrffr p9.b");
+ COMPARE(wrffr(p9.VnB()), "wrffr p9.b");
+
+ CLEANUP();
+}
+
+TEST(sve2_match_nmatch) {
+ SETUP();
+
+ COMPARE(match(p15.VnB(), p1.Zeroing(), z18.VnB(), z5.VnB()),
+ "match p15.b, p1/z, z18.b, z5.b");
+ COMPARE(match(p15.VnH(), p1.Zeroing(), z18.VnH(), z5.VnH()),
+ "match p15.h, p1/z, z18.h, z5.h");
+ COMPARE(nmatch(p1.VnB(), p1.Zeroing(), z20.VnB(), z17.VnB()),
+ "nmatch p1.b, p1/z, z20.b, z17.b");
+ COMPARE(nmatch(p1.VnH(), p1.Zeroing(), z20.VnH(), z17.VnH()),
+ "nmatch p1.h, p1/z, z20.h, z17.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_saba_uaba) {
+ SETUP();
+
+ COMPARE(saba(z13.VnB(), z2.VnB(), z31.VnB()), "saba z13.b, z2.b, z31.b");
+ COMPARE(saba(z13.VnD(), z2.VnD(), z31.VnD()), "saba z13.d, z2.d, z31.d");
+ COMPARE(saba(z13.VnH(), z2.VnH(), z31.VnH()), "saba z13.h, z2.h, z31.h");
+ COMPARE(saba(z13.VnS(), z2.VnS(), z31.VnS()), "saba z13.s, z2.s, z31.s");
+ COMPARE(uaba(z23.VnB(), z22.VnB(), z20.VnB()), "uaba z23.b, z22.b, z20.b");
+ COMPARE(uaba(z23.VnD(), z22.VnD(), z20.VnD()), "uaba z23.d, z22.d, z20.d");
+ COMPARE(uaba(z23.VnH(), z22.VnH(), z20.VnH()), "uaba z23.h, z22.h, z20.h");
+ COMPARE(uaba(z23.VnS(), z22.VnS(), z20.VnS()), "uaba z23.s, z22.s, z20.s");
+
+ COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()),
+ "saba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()),
+ "saba z12.b, z3.b, z12.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()),
+ "saba z12.b, z12.b, z30.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), "");
+ COMPARE_MACRO(Saba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z13\n"
+ "saba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z3\n"
+ "saba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z30\n"
+ "saba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()),
+ "mov z12.d, z3.d");
+ COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "saba z12.b, z31.b, z3.b");
+ COMPARE_MACRO(Saba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "saba z12.b, z3.b, z31.b");
+
+ COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z30.VnB()),
+ "uaba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z3.VnB(), z12.VnB()),
+ "uaba z12.b, z3.b, z12.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z30.VnB()),
+ "uaba z12.b, z12.b, z30.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z12.VnB(), z12.VnB(), z12.VnB()), "");
+ COMPARE_MACRO(Uaba(z12.VnB(), z13.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z13\n"
+ "uaba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z3\n"
+ "uaba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z30.VnB(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z30\n"
+ "uaba z12.b, z3.b, z30.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z3.VnB()),
+ "mov z12.d, z3.d");
+ COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z12.VnB(), z3.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "uaba z12.b, z31.b, z3.b");
+ COMPARE_MACRO(Uaba(z12.VnB(), z3.VnB(), z3.VnB(), z12.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "uaba z12.b, z3.b, z31.b");
+
+ CLEANUP();
+}
+
+TEST(sve2_halving_arithmetic) {
+ SETUP();
+
+ COMPARE(shadd(z20.VnB(), p3.Merging(), z20.VnB(), z7.VnB()),
+ "shadd z20.b, p3/m, z20.b, z7.b");
+ COMPARE(shadd(z20.VnD(), p3.Merging(), z20.VnD(), z7.VnD()),
+ "shadd z20.d, p3/m, z20.d, z7.d");
+ COMPARE(shadd(z20.VnH(), p3.Merging(), z20.VnH(), z7.VnH()),
+ "shadd z20.h, p3/m, z20.h, z7.h");
+ COMPARE(shadd(z20.VnS(), p3.Merging(), z20.VnS(), z7.VnS()),
+ "shadd z20.s, p3/m, z20.s, z7.s");
+ COMPARE(shsub(z21.VnB(), p0.Merging(), z21.VnB(), z0.VnB()),
+ "shsub z21.b, p0/m, z21.b, z0.b");
+ COMPARE(shsub(z21.VnD(), p0.Merging(), z21.VnD(), z0.VnD()),
+ "shsub z21.d, p0/m, z21.d, z0.d");
+ COMPARE(shsub(z21.VnH(), p0.Merging(), z21.VnH(), z0.VnH()),
+ "shsub z21.h, p0/m, z21.h, z0.h");
+ COMPARE(shsub(z21.VnS(), p0.Merging(), z21.VnS(), z0.VnS()),
+ "shsub z21.s, p0/m, z21.s, z0.s");
+ COMPARE(shsubr(z1.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
+ "shsubr z1.b, p0/m, z1.b, z2.b");
+ COMPARE(shsubr(z1.VnD(), p0.Merging(), z1.VnD(), z2.VnD()),
+ "shsubr z1.d, p0/m, z1.d, z2.d");
+ COMPARE(shsubr(z1.VnH(), p0.Merging(), z1.VnH(), z2.VnH()),
+ "shsubr z1.h, p0/m, z1.h, z2.h");
+ COMPARE(shsubr(z1.VnS(), p0.Merging(), z1.VnS(), z2.VnS()),
+ "shsubr z1.s, p0/m, z1.s, z2.s");
+ COMPARE(srhadd(z23.VnB(), p4.Merging(), z23.VnB(), z14.VnB()),
+ "srhadd z23.b, p4/m, z23.b, z14.b");
+ COMPARE(srhadd(z23.VnD(), p4.Merging(), z23.VnD(), z14.VnD()),
+ "srhadd z23.d, p4/m, z23.d, z14.d");
+ COMPARE(srhadd(z23.VnH(), p4.Merging(), z23.VnH(), z14.VnH()),
+ "srhadd z23.h, p4/m, z23.h, z14.h");
+ COMPARE(srhadd(z23.VnS(), p4.Merging(), z23.VnS(), z14.VnS()),
+ "srhadd z23.s, p4/m, z23.s, z14.s");
+
+ COMPARE(uhadd(z21.VnB(), p2.Merging(), z21.VnB(), z19.VnB()),
+ "uhadd z21.b, p2/m, z21.b, z19.b");
+ COMPARE(uhadd(z21.VnD(), p2.Merging(), z21.VnD(), z19.VnD()),
+ "uhadd z21.d, p2/m, z21.d, z19.d");
+ COMPARE(uhadd(z21.VnH(), p2.Merging(), z21.VnH(), z19.VnH()),
+ "uhadd z21.h, p2/m, z21.h, z19.h");
+ COMPARE(uhadd(z21.VnS(), p2.Merging(), z21.VnS(), z19.VnS()),
+ "uhadd z21.s, p2/m, z21.s, z19.s");
+ COMPARE(uhsub(z1.VnB(), p4.Merging(), z1.VnB(), z9.VnB()),
+ "uhsub z1.b, p4/m, z1.b, z9.b");
+ COMPARE(uhsub(z1.VnD(), p4.Merging(), z1.VnD(), z9.VnD()),
+ "uhsub z1.d, p4/m, z1.d, z9.d");
+ COMPARE(uhsub(z1.VnH(), p4.Merging(), z1.VnH(), z9.VnH()),
+ "uhsub z1.h, p4/m, z1.h, z9.h");
+ COMPARE(uhsub(z1.VnS(), p4.Merging(), z1.VnS(), z9.VnS()),
+ "uhsub z1.s, p4/m, z1.s, z9.s");
+ COMPARE(uhsubr(z18.VnB(), p0.Merging(), z18.VnB(), z1.VnB()),
+ "uhsubr z18.b, p0/m, z18.b, z1.b");
+ COMPARE(uhsubr(z18.VnD(), p0.Merging(), z18.VnD(), z1.VnD()),
+ "uhsubr z18.d, p0/m, z18.d, z1.d");
+ COMPARE(uhsubr(z18.VnH(), p0.Merging(), z18.VnH(), z1.VnH()),
+ "uhsubr z18.h, p0/m, z18.h, z1.h");
+ COMPARE(uhsubr(z18.VnS(), p0.Merging(), z18.VnS(), z1.VnS()),
+ "uhsubr z18.s, p0/m, z18.s, z1.s");
+ COMPARE(urhadd(z29.VnB(), p4.Merging(), z29.VnB(), z10.VnB()),
+ "urhadd z29.b, p4/m, z29.b, z10.b");
+ COMPARE(urhadd(z29.VnD(), p4.Merging(), z29.VnD(), z10.VnD()),
+ "urhadd z29.d, p4/m, z29.d, z10.d");
+ COMPARE(urhadd(z29.VnH(), p4.Merging(), z29.VnH(), z10.VnH()),
+ "urhadd z29.h, p4/m, z29.h, z10.h");
+ COMPARE(urhadd(z29.VnS(), p4.Merging(), z29.VnS(), z10.VnS()),
+ "urhadd z29.s, p4/m, z29.s, z10.s");
+
+ COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
+ "movprfx z0.b, p0/m, z1.b\n"
+ "shadd z0.b, p0/m, z0.b, z2.b");
+ COMPARE_MACRO(Shadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "shadd z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
+ "movprfx z0.b, p0/m, z1.b\n"
+ "srhadd z0.b, p0/m, z0.b, z2.b");
+ COMPARE_MACRO(Srhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "srhadd z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
+ "movprfx z0.b, p0/m, z1.b\n"
+ "uhadd z0.b, p0/m, z0.b, z2.b");
+ COMPARE_MACRO(Uhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "uhadd z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z2.VnB()),
+ "movprfx z0.b, p0/m, z1.b\n"
+ "urhadd z0.b, p0/m, z0.b, z2.b");
+ COMPARE_MACRO(Urhadd(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "urhadd z0.b, p0/m, z0.b, z1.b");
+
+ COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()),
+ "shsub z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Shsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "shsubr z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z0.VnB(), z1.VnB()),
+ "uhsub z0.b, p0/m, z0.b, z1.b");
+ COMPARE_MACRO(Uhsub(z0.VnB(), p0.Merging(), z1.VnB(), z0.VnB()),
+ "uhsubr z0.b, p0/m, z0.b, z1.b");
+
+ CLEANUP();
+}
+
+TEST(sve2_sra) {
+ SETUP();
+
+ COMPARE(ssra(z0.VnB(), z8.VnB(), 1), "ssra z0.b, z8.b, #1");
+ COMPARE(ssra(z0.VnB(), z8.VnB(), 2), "ssra z0.b, z8.b, #2");
+ COMPARE(ssra(z0.VnB(), z8.VnB(), 5), "ssra z0.b, z8.b, #5");
+ COMPARE(ssra(z0.VnB(), z8.VnB(), 8), "ssra z0.b, z8.b, #8");
+ COMPARE(ssra(z0.VnH(), z8.VnH(), 1), "ssra z0.h, z8.h, #1");
+ COMPARE(ssra(z0.VnH(), z8.VnH(), 16), "ssra z0.h, z8.h, #16");
+ COMPARE(ssra(z0.VnS(), z8.VnS(), 1), "ssra z0.s, z8.s, #1");
+ COMPARE(ssra(z0.VnS(), z8.VnS(), 31), "ssra z0.s, z8.s, #31");
+ COMPARE(ssra(z0.VnD(), z8.VnD(), 1), "ssra z0.d, z8.d, #1");
+ COMPARE(ssra(z0.VnD(), z8.VnD(), 64), "ssra z0.d, z8.d, #64");
+
+ COMPARE(srsra(z0.VnB(), z8.VnB(), 1), "srsra z0.b, z8.b, #1");
+ COMPARE(srsra(z0.VnB(), z8.VnB(), 2), "srsra z0.b, z8.b, #2");
+ COMPARE(srsra(z0.VnB(), z8.VnB(), 5), "srsra z0.b, z8.b, #5");
+ COMPARE(srsra(z0.VnB(), z8.VnB(), 8), "srsra z0.b, z8.b, #8");
+ COMPARE(srsra(z0.VnH(), z8.VnH(), 1), "srsra z0.h, z8.h, #1");
+ COMPARE(srsra(z0.VnH(), z8.VnH(), 16), "srsra z0.h, z8.h, #16");
+ COMPARE(srsra(z0.VnS(), z8.VnS(), 1), "srsra z0.s, z8.s, #1");
+ COMPARE(srsra(z0.VnS(), z8.VnS(), 31), "srsra z0.s, z8.s, #31");
+ COMPARE(srsra(z0.VnD(), z8.VnD(), 1), "srsra z0.d, z8.d, #1");
+ COMPARE(srsra(z0.VnD(), z8.VnD(), 64), "srsra z0.d, z8.d, #64");
+
+ COMPARE(usra(z0.VnB(), z8.VnB(), 1), "usra z0.b, z8.b, #1");
+ COMPARE(usra(z0.VnB(), z8.VnB(), 2), "usra z0.b, z8.b, #2");
+ COMPARE(usra(z0.VnB(), z8.VnB(), 5), "usra z0.b, z8.b, #5");
+ COMPARE(usra(z0.VnB(), z8.VnB(), 8), "usra z0.b, z8.b, #8");
+ COMPARE(usra(z0.VnH(), z8.VnH(), 1), "usra z0.h, z8.h, #1");
+ COMPARE(usra(z0.VnH(), z8.VnH(), 16), "usra z0.h, z8.h, #16");
+ COMPARE(usra(z0.VnS(), z8.VnS(), 1), "usra z0.s, z8.s, #1");
+ COMPARE(usra(z0.VnS(), z8.VnS(), 31), "usra z0.s, z8.s, #31");
+ COMPARE(usra(z0.VnD(), z8.VnD(), 1), "usra z0.d, z8.d, #1");
+ COMPARE(usra(z0.VnD(), z8.VnD(), 64), "usra z0.d, z8.d, #64");
+
+ COMPARE(ursra(z0.VnB(), z8.VnB(), 1), "ursra z0.b, z8.b, #1");
+ COMPARE(ursra(z0.VnB(), z8.VnB(), 2), "ursra z0.b, z8.b, #2");
+ COMPARE(ursra(z0.VnB(), z8.VnB(), 5), "ursra z0.b, z8.b, #5");
+ COMPARE(ursra(z0.VnB(), z8.VnB(), 8), "ursra z0.b, z8.b, #8");
+ COMPARE(ursra(z0.VnH(), z8.VnH(), 1), "ursra z0.h, z8.h, #1");
+ COMPARE(ursra(z0.VnH(), z8.VnH(), 16), "ursra z0.h, z8.h, #16");
+ COMPARE(ursra(z0.VnS(), z8.VnS(), 1), "ursra z0.s, z8.s, #1");
+ COMPARE(ursra(z0.VnS(), z8.VnS(), 31), "ursra z0.s, z8.s, #31");
+ COMPARE(ursra(z0.VnD(), z8.VnD(), 1), "ursra z0.d, z8.d, #1");
+ COMPARE(ursra(z0.VnD(), z8.VnD(), 64), "ursra z0.d, z8.d, #64");
+
+ COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z2.VnB(), 2),
+ "movprfx z0, z1\n"
+ "ssra z0.b, z2.b, #2");
+ COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z2.VnB(), 2), "ssra z0.b, z2.b, #2");
+ COMPARE_MACRO(Ssra(z0.VnB(), z1.VnB(), z1.VnB(), 2),
+ "movprfx z0, z1\n"
+ "ssra z0.b, z1.b, #2");
+ COMPARE_MACRO(Ssra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
+ "mov z31.d, z2.d\n"
+ "movprfx z2, z1\n"
+ "ssra z2.b, z31.b, #2");
+ COMPARE_MACRO(Ssra(z0.VnB(), z0.VnB(), z0.VnB(), 2), "ssra z0.b, z0.b, #2");
+
+ COMPARE_MACRO(Srsra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
+ "mov z31.d, z2.d\n"
+ "movprfx z2, z1\n"
+ "srsra z2.b, z31.b, #2");
+ COMPARE_MACRO(Usra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
+ "mov z31.d, z2.d\n"
+ "movprfx z2, z1\n"
+ "usra z2.b, z31.b, #2");
+ COMPARE_MACRO(Ursra(z2.VnB(), z1.VnB(), z2.VnB(), 2),
+ "mov z31.d, z2.d\n"
+ "movprfx z2, z1\n"
+ "ursra z2.b, z31.b, #2");
+ CLEANUP();
+}
+
+TEST(sve2_sri_sli) {
+ SETUP();
+
+ COMPARE(sri(z6.VnB(), z9.VnB(), 1), "sri z6.b, z9.b, #1");
+ COMPARE(sri(z6.VnB(), z9.VnB(), 2), "sri z6.b, z9.b, #2");
+ COMPARE(sri(z6.VnB(), z9.VnB(), 5), "sri z6.b, z9.b, #5");
+ COMPARE(sri(z6.VnB(), z9.VnB(), 8), "sri z6.b, z9.b, #8");
+ COMPARE(sri(z6.VnH(), z9.VnH(), 1), "sri z6.h, z9.h, #1");
+ COMPARE(sri(z6.VnH(), z9.VnH(), 16), "sri z6.h, z9.h, #16");
+ COMPARE(sri(z6.VnS(), z9.VnS(), 1), "sri z6.s, z9.s, #1");
+ COMPARE(sri(z6.VnS(), z9.VnS(), 31), "sri z6.s, z9.s, #31");
+ COMPARE(sri(z6.VnD(), z9.VnD(), 1), "sri z6.d, z9.d, #1");
+ COMPARE(sri(z6.VnD(), z9.VnD(), 64), "sri z6.d, z9.d, #64");
+
+ COMPARE(sli(z29.VnB(), z7.VnB(), 0), "sli z29.b, z7.b, #0");
+ COMPARE(sli(z29.VnB(), z7.VnB(), 2), "sli z29.b, z7.b, #2");
+ COMPARE(sli(z29.VnB(), z7.VnB(), 5), "sli z29.b, z7.b, #5");
+ COMPARE(sli(z29.VnB(), z7.VnB(), 7), "sli z29.b, z7.b, #7");
+ COMPARE(sli(z29.VnH(), z7.VnH(), 0), "sli z29.h, z7.h, #0");
+ COMPARE(sli(z29.VnH(), z7.VnH(), 15), "sli z29.h, z7.h, #15");
+ COMPARE(sli(z29.VnS(), z7.VnS(), 0), "sli z29.s, z7.s, #0");
+ COMPARE(sli(z29.VnS(), z7.VnS(), 31), "sli z29.s, z7.s, #31");
+ COMPARE(sli(z29.VnD(), z7.VnD(), 0), "sli z29.d, z7.d, #0");
+ COMPARE(sli(z29.VnD(), z7.VnD(), 63), "sli z29.d, z7.d, #63");
+
+ CLEANUP();
+}
+
+TEST(sve2_shift_imm) {
+ SETUP();
+
+ COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 0),
+ "sqshl z0.b, p5/m, z0.b, #0");
+ COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 2),
+ "sqshl z0.b, p5/m, z0.b, #2");
+ COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 5),
+ "sqshl z0.b, p5/m, z0.b, #5");
+ COMPARE(sqshl(z0.VnB(), p5.Merging(), z0.VnB(), 7),
+ "sqshl z0.b, p5/m, z0.b, #7");
+ COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 0),
+ "sqshl z0.h, p5/m, z0.h, #0");
+ COMPARE(sqshl(z0.VnH(), p5.Merging(), z0.VnH(), 15),
+ "sqshl z0.h, p5/m, z0.h, #15");
+ COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 0),
+ "sqshl z0.s, p5/m, z0.s, #0");
+ COMPARE(sqshl(z0.VnS(), p5.Merging(), z0.VnS(), 31),
+ "sqshl z0.s, p5/m, z0.s, #31");
+ COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 0),
+ "sqshl z0.d, p5/m, z0.d, #0");
+ COMPARE(sqshl(z0.VnD(), p5.Merging(), z0.VnD(), 63),
+ "sqshl z0.d, p5/m, z0.d, #63");
+
+ COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 0),
+ "sqshlu z10.b, p1/m, z10.b, #0");
+ COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 2),
+ "sqshlu z10.b, p1/m, z10.b, #2");
+ COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 5),
+ "sqshlu z10.b, p1/m, z10.b, #5");
+ COMPARE(sqshlu(z10.VnB(), p1.Merging(), z10.VnB(), 7),
+ "sqshlu z10.b, p1/m, z10.b, #7");
+ COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 0),
+ "sqshlu z10.h, p1/m, z10.h, #0");
+ COMPARE(sqshlu(z10.VnH(), p1.Merging(), z10.VnH(), 15),
+ "sqshlu z10.h, p1/m, z10.h, #15");
+ COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 0),
+ "sqshlu z10.s, p1/m, z10.s, #0");
+ COMPARE(sqshlu(z10.VnS(), p1.Merging(), z10.VnS(), 31),
+ "sqshlu z10.s, p1/m, z10.s, #31");
+ COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 0),
+ "sqshlu z10.d, p1/m, z10.d, #0");
+ COMPARE(sqshlu(z10.VnD(), p1.Merging(), z10.VnD(), 63),
+ "sqshlu z10.d, p1/m, z10.d, #63");
+
+ COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 1),
+ "srshr z12.b, p0/m, z12.b, #1");
+ COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 2),
+ "srshr z12.b, p0/m, z12.b, #2");
+ COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 5),
+ "srshr z12.b, p0/m, z12.b, #5");
+ COMPARE(srshr(z12.VnB(), p0.Merging(), z12.VnB(), 8),
+ "srshr z12.b, p0/m, z12.b, #8");
+ COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 1),
+ "srshr z12.h, p0/m, z12.h, #1");
+ COMPARE(srshr(z12.VnH(), p0.Merging(), z12.VnH(), 16),
+ "srshr z12.h, p0/m, z12.h, #16");
+ COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 1),
+ "srshr z12.s, p0/m, z12.s, #1");
+ COMPARE(srshr(z12.VnS(), p0.Merging(), z12.VnS(), 32),
+ "srshr z12.s, p0/m, z12.s, #32");
+ COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 1),
+ "srshr z12.d, p0/m, z12.d, #1");
+ COMPARE(srshr(z12.VnD(), p0.Merging(), z12.VnD(), 64),
+ "srshr z12.d, p0/m, z12.d, #64");
+
+ COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 0),
+ "uqshl z29.b, p7/m, z29.b, #0");
+ COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 2),
+ "uqshl z29.b, p7/m, z29.b, #2");
+ COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 5),
+ "uqshl z29.b, p7/m, z29.b, #5");
+ COMPARE(uqshl(z29.VnB(), p7.Merging(), z29.VnB(), 7),
+ "uqshl z29.b, p7/m, z29.b, #7");
+ COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 0),
+ "uqshl z29.h, p7/m, z29.h, #0");
+ COMPARE(uqshl(z29.VnH(), p7.Merging(), z29.VnH(), 15),
+ "uqshl z29.h, p7/m, z29.h, #15");
+ COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 0),
+ "uqshl z29.s, p7/m, z29.s, #0");
+ COMPARE(uqshl(z29.VnS(), p7.Merging(), z29.VnS(), 31),
+ "uqshl z29.s, p7/m, z29.s, #31");
+ COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 0),
+ "uqshl z29.d, p7/m, z29.d, #0");
+ COMPARE(uqshl(z29.VnD(), p7.Merging(), z29.VnD(), 63),
+ "uqshl z29.d, p7/m, z29.d, #63");
+
+ COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 1),
+ "urshr z31.b, p2/m, z31.b, #1");
+ COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 2),
+ "urshr z31.b, p2/m, z31.b, #2");
+ COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 5),
+ "urshr z31.b, p2/m, z31.b, #5");
+ COMPARE(urshr(z31.VnB(), p2.Merging(), z31.VnB(), 8),
+ "urshr z31.b, p2/m, z31.b, #8");
+ COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 1),
+ "urshr z31.h, p2/m, z31.h, #1");
+ COMPARE(urshr(z31.VnH(), p2.Merging(), z31.VnH(), 16),
+ "urshr z31.h, p2/m, z31.h, #16");
+ COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 1),
+ "urshr z31.s, p2/m, z31.s, #1");
+ COMPARE(urshr(z31.VnS(), p2.Merging(), z31.VnS(), 32),
+ "urshr z31.s, p2/m, z31.s, #32");
+ COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 1),
+ "urshr z31.d, p2/m, z31.d, #1");
+ COMPARE(urshr(z31.VnD(), p2.Merging(), z31.VnD(), 64),
+ "urshr z31.d, p2/m, z31.d, #64");
+
+ CLEANUP();
+}
+
+TEST(sve2_shift_sat) {
+ SETUP();
+
+ COMPARE(srshl(z31.VnB(), p7.Merging(), z31.VnB(), z3.VnB()),
+ "srshl z31.b, p7/m, z31.b, z3.b");
+ COMPARE(srshl(z31.VnD(), p7.Merging(), z31.VnD(), z3.VnD()),
+ "srshl z31.d, p7/m, z31.d, z3.d");
+ COMPARE(srshl(z31.VnH(), p7.Merging(), z31.VnH(), z3.VnH()),
+ "srshl z31.h, p7/m, z31.h, z3.h");
+ COMPARE(srshl(z31.VnS(), p7.Merging(), z31.VnS(), z3.VnS()),
+ "srshl z31.s, p7/m, z31.s, z3.s");
+
+ COMPARE(srshlr(z16.VnB(), p7.Merging(), z16.VnB(), z29.VnB()),
+ "srshlr z16.b, p7/m, z16.b, z29.b");
+ COMPARE(srshlr(z16.VnD(), p7.Merging(), z16.VnD(), z29.VnD()),
+ "srshlr z16.d, p7/m, z16.d, z29.d");
+ COMPARE(srshlr(z16.VnH(), p7.Merging(), z16.VnH(), z29.VnH()),
+ "srshlr z16.h, p7/m, z16.h, z29.h");
+ COMPARE(srshlr(z16.VnS(), p7.Merging(), z16.VnS(), z29.VnS()),
+ "srshlr z16.s, p7/m, z16.s, z29.s");
+
+ COMPARE(urshl(z15.VnB(), p2.Merging(), z15.VnB(), z3.VnB()),
+ "urshl z15.b, p2/m, z15.b, z3.b");
+ COMPARE(urshl(z15.VnD(), p2.Merging(), z15.VnD(), z3.VnD()),
+ "urshl z15.d, p2/m, z15.d, z3.d");
+ COMPARE(urshl(z15.VnH(), p2.Merging(), z15.VnH(), z3.VnH()),
+ "urshl z15.h, p2/m, z15.h, z3.h");
+ COMPARE(urshl(z15.VnS(), p2.Merging(), z15.VnS(), z3.VnS()),
+ "urshl z15.s, p2/m, z15.s, z3.s");
+
+ COMPARE(urshlr(z27.VnB(), p1.Merging(), z27.VnB(), z30.VnB()),
+ "urshlr z27.b, p1/m, z27.b, z30.b");
+ COMPARE(urshlr(z27.VnD(), p1.Merging(), z27.VnD(), z30.VnD()),
+ "urshlr z27.d, p1/m, z27.d, z30.d");
+ COMPARE(urshlr(z27.VnH(), p1.Merging(), z27.VnH(), z30.VnH()),
+ "urshlr z27.h, p1/m, z27.h, z30.h");
+ COMPARE(urshlr(z27.VnS(), p1.Merging(), z27.VnS(), z30.VnS()),
+ "urshlr z27.s, p1/m, z27.s, z30.s");
+
+ COMPARE(sqshl(z22.VnB(), p4.Merging(), z22.VnB(), z21.VnB()),
+ "sqshl z22.b, p4/m, z22.b, z21.b");
+ COMPARE(sqshl(z22.VnD(), p4.Merging(), z22.VnD(), z21.VnD()),
+ "sqshl z22.d, p4/m, z22.d, z21.d");
+ COMPARE(sqshl(z22.VnH(), p4.Merging(), z22.VnH(), z21.VnH()),
+ "sqshl z22.h, p4/m, z22.h, z21.h");
+ COMPARE(sqshl(z22.VnS(), p4.Merging(), z22.VnS(), z21.VnS()),
+ "sqshl z22.s, p4/m, z22.s, z21.s");
+
+ COMPARE(sqshlr(z7.VnB(), p3.Merging(), z7.VnB(), z5.VnB()),
+ "sqshlr z7.b, p3/m, z7.b, z5.b");
+ COMPARE(sqshlr(z7.VnD(), p3.Merging(), z7.VnD(), z5.VnD()),
+ "sqshlr z7.d, p3/m, z7.d, z5.d");
+ COMPARE(sqshlr(z7.VnH(), p3.Merging(), z7.VnH(), z5.VnH()),
+ "sqshlr z7.h, p3/m, z7.h, z5.h");
+ COMPARE(sqshlr(z7.VnS(), p3.Merging(), z7.VnS(), z5.VnS()),
+ "sqshlr z7.s, p3/m, z7.s, z5.s");
+
+ COMPARE(uqshl(z10.VnB(), p0.Merging(), z10.VnB(), z21.VnB()),
+ "uqshl z10.b, p0/m, z10.b, z21.b");
+ COMPARE(uqshl(z10.VnD(), p0.Merging(), z10.VnD(), z21.VnD()),
+ "uqshl z10.d, p0/m, z10.d, z21.d");
+ COMPARE(uqshl(z10.VnH(), p0.Merging(), z10.VnH(), z21.VnH()),
+ "uqshl z10.h, p0/m, z10.h, z21.h");
+ COMPARE(uqshl(z10.VnS(), p0.Merging(), z10.VnS(), z21.VnS()),
+ "uqshl z10.s, p0/m, z10.s, z21.s");
+
+ COMPARE(uqshlr(z12.VnB(), p1.Merging(), z12.VnB(), z12.VnB()),
+ "uqshlr z12.b, p1/m, z12.b, z12.b");
+ COMPARE(uqshlr(z12.VnD(), p1.Merging(), z12.VnD(), z12.VnD()),
+ "uqshlr z12.d, p1/m, z12.d, z12.d");
+ COMPARE(uqshlr(z12.VnH(), p1.Merging(), z12.VnH(), z12.VnH()),
+ "uqshlr z12.h, p1/m, z12.h, z12.h");
+ COMPARE(uqshlr(z12.VnS(), p1.Merging(), z12.VnS(), z12.VnS()),
+ "uqshlr z12.s, p1/m, z12.s, z12.s");
+
+ COMPARE(sqrshl(z31.VnB(), p5.Merging(), z31.VnB(), z27.VnB()),
+ "sqrshl z31.b, p5/m, z31.b, z27.b");
+ COMPARE(sqrshl(z31.VnD(), p5.Merging(), z31.VnD(), z27.VnD()),
+ "sqrshl z31.d, p5/m, z31.d, z27.d");
+ COMPARE(sqrshl(z31.VnH(), p5.Merging(), z31.VnH(), z27.VnH()),
+ "sqrshl z31.h, p5/m, z31.h, z27.h");
+ COMPARE(sqrshl(z31.VnS(), p5.Merging(), z31.VnS(), z27.VnS()),
+ "sqrshl z31.s, p5/m, z31.s, z27.s");
+
+ COMPARE(sqrshlr(z25.VnB(), p6.Merging(), z25.VnB(), z7.VnB()),
+ "sqrshlr z25.b, p6/m, z25.b, z7.b");
+ COMPARE(sqrshlr(z25.VnD(), p6.Merging(), z25.VnD(), z7.VnD()),
+ "sqrshlr z25.d, p6/m, z25.d, z7.d");
+ COMPARE(sqrshlr(z25.VnH(), p6.Merging(), z25.VnH(), z7.VnH()),
+ "sqrshlr z25.h, p6/m, z25.h, z7.h");
+ COMPARE(sqrshlr(z25.VnS(), p6.Merging(), z25.VnS(), z7.VnS()),
+ "sqrshlr z25.s, p6/m, z25.s, z7.s");
+
+ COMPARE(uqrshl(z20.VnB(), p1.Merging(), z20.VnB(), z30.VnB()),
+ "uqrshl z20.b, p1/m, z20.b, z30.b");
+ COMPARE(uqrshl(z20.VnD(), p1.Merging(), z20.VnD(), z30.VnD()),
+ "uqrshl z20.d, p1/m, z20.d, z30.d");
+ COMPARE(uqrshl(z20.VnH(), p1.Merging(), z20.VnH(), z30.VnH()),
+ "uqrshl z20.h, p1/m, z20.h, z30.h");
+ COMPARE(uqrshl(z20.VnS(), p1.Merging(), z20.VnS(), z30.VnS()),
+ "uqrshl z20.s, p1/m, z20.s, z30.s");
+
+ COMPARE(uqrshlr(z8.VnB(), p5.Merging(), z8.VnB(), z9.VnB()),
+ "uqrshlr z8.b, p5/m, z8.b, z9.b");
+ COMPARE(uqrshlr(z8.VnD(), p5.Merging(), z8.VnD(), z9.VnD()),
+ "uqrshlr z8.d, p5/m, z8.d, z9.d");
+ COMPARE(uqrshlr(z8.VnH(), p5.Merging(), z8.VnH(), z9.VnH()),
+ "uqrshlr z8.h, p5/m, z8.h, z9.h");
+ COMPARE(uqrshlr(z8.VnS(), p5.Merging(), z8.VnS(), z9.VnS()),
+ "uqrshlr z8.s, p5/m, z8.s, z9.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_sat_arith) {
+ SETUP();
+
+ COMPARE(sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "sqadd z28.b, p0/m, z28.b, z3.b");
+ COMPARE(sqadd(z28.VnD(), p0.Merging(), z28.VnD(), z3.VnD()),
+ "sqadd z28.d, p0/m, z28.d, z3.d");
+ COMPARE(sqadd(z28.VnH(), p0.Merging(), z28.VnH(), z3.VnH()),
+ "sqadd z28.h, p0/m, z28.h, z3.h");
+ COMPARE(sqadd(z28.VnS(), p0.Merging(), z28.VnS(), z3.VnS()),
+ "sqadd z28.s, p0/m, z28.s, z3.s");
+ COMPARE(sqsub(z6.VnB(), p0.Merging(), z6.VnB(), z12.VnB()),
+ "sqsub z6.b, p0/m, z6.b, z12.b");
+ COMPARE(sqsub(z6.VnD(), p0.Merging(), z6.VnD(), z12.VnD()),
+ "sqsub z6.d, p0/m, z6.d, z12.d");
+ COMPARE(sqsub(z6.VnH(), p0.Merging(), z6.VnH(), z12.VnH()),
+ "sqsub z6.h, p0/m, z6.h, z12.h");
+ COMPARE(sqsub(z6.VnS(), p0.Merging(), z6.VnS(), z12.VnS()),
+ "sqsub z6.s, p0/m, z6.s, z12.s");
+ COMPARE(sqsubr(z16.VnB(), p7.Merging(), z16.VnB(), z22.VnB()),
+ "sqsubr z16.b, p7/m, z16.b, z22.b");
+ COMPARE(sqsubr(z16.VnD(), p7.Merging(), z16.VnD(), z22.VnD()),
+ "sqsubr z16.d, p7/m, z16.d, z22.d");
+ COMPARE(sqsubr(z16.VnH(), p7.Merging(), z16.VnH(), z22.VnH()),
+ "sqsubr z16.h, p7/m, z16.h, z22.h");
+ COMPARE(sqsubr(z16.VnS(), p7.Merging(), z16.VnS(), z22.VnS()),
+ "sqsubr z16.s, p7/m, z16.s, z22.s");
+ COMPARE(suqadd(z26.VnB(), p2.Merging(), z26.VnB(), z28.VnB()),
+ "suqadd z26.b, p2/m, z26.b, z28.b");
+ COMPARE(suqadd(z26.VnD(), p2.Merging(), z26.VnD(), z28.VnD()),
+ "suqadd z26.d, p2/m, z26.d, z28.d");
+ COMPARE(suqadd(z26.VnH(), p2.Merging(), z26.VnH(), z28.VnH()),
+ "suqadd z26.h, p2/m, z26.h, z28.h");
+ COMPARE(suqadd(z26.VnS(), p2.Merging(), z26.VnS(), z28.VnS()),
+ "suqadd z26.s, p2/m, z26.s, z28.s");
+ COMPARE(usqadd(z25.VnB(), p4.Merging(), z25.VnB(), z6.VnB()),
+ "usqadd z25.b, p4/m, z25.b, z6.b");
+ COMPARE(usqadd(z25.VnD(), p4.Merging(), z25.VnD(), z6.VnD()),
+ "usqadd z25.d, p4/m, z25.d, z6.d");
+ COMPARE(usqadd(z25.VnH(), p4.Merging(), z25.VnH(), z6.VnH()),
+ "usqadd z25.h, p4/m, z25.h, z6.h");
+ COMPARE(usqadd(z25.VnS(), p4.Merging(), z25.VnS(), z6.VnS()),
+ "usqadd z25.s, p4/m, z25.s, z6.s");
+ COMPARE(uqadd(z24.VnB(), p7.Merging(), z24.VnB(), z1.VnB()),
+ "uqadd z24.b, p7/m, z24.b, z1.b");
+ COMPARE(uqadd(z24.VnD(), p7.Merging(), z24.VnD(), z1.VnD()),
+ "uqadd z24.d, p7/m, z24.d, z1.d");
+ COMPARE(uqadd(z24.VnH(), p7.Merging(), z24.VnH(), z1.VnH()),
+ "uqadd z24.h, p7/m, z24.h, z1.h");
+ COMPARE(uqadd(z24.VnS(), p7.Merging(), z24.VnS(), z1.VnS()),
+ "uqadd z24.s, p7/m, z24.s, z1.s");
+ COMPARE(uqsub(z10.VnB(), p3.Merging(), z10.VnB(), z1.VnB()),
+ "uqsub z10.b, p3/m, z10.b, z1.b");
+ COMPARE(uqsub(z10.VnD(), p3.Merging(), z10.VnD(), z1.VnD()),
+ "uqsub z10.d, p3/m, z10.d, z1.d");
+ COMPARE(uqsub(z10.VnH(), p3.Merging(), z10.VnH(), z1.VnH()),
+ "uqsub z10.h, p3/m, z10.h, z1.h");
+ COMPARE(uqsub(z10.VnS(), p3.Merging(), z10.VnS(), z1.VnS()),
+ "uqsub z10.s, p3/m, z10.s, z1.s");
+ COMPARE(uqsubr(z20.VnB(), p0.Merging(), z20.VnB(), z6.VnB()),
+ "uqsubr z20.b, p0/m, z20.b, z6.b");
+ COMPARE(uqsubr(z20.VnD(), p0.Merging(), z20.VnD(), z6.VnD()),
+ "uqsubr z20.d, p0/m, z20.d, z6.d");
+ COMPARE(uqsubr(z20.VnH(), p0.Merging(), z20.VnH(), z6.VnH()),
+ "uqsubr z20.h, p0/m, z20.h, z6.h");
+ COMPARE(uqsubr(z20.VnS(), p0.Merging(), z20.VnS(), z6.VnS()),
+ "uqsubr z20.s, p0/m, z20.s, z6.s");
+
+ COMPARE_MACRO(Sqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "sqadd z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "sqadd z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Sqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "sqadd z29.b, p0/m, z29.b, z28.b");
+ COMPARE_MACRO(Uqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "uqadd z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "uqadd z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Uqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "uqadd z29.b, p0/m, z29.b, z28.b");
+
+ COMPARE_MACRO(Sqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "sqsub z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "sqsub z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Sqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "sqsubr z29.b, p0/m, z29.b, z28.b");
+ COMPARE_MACRO(Uqsub(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "uqsub z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "uqsub z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Uqsub(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "uqsubr z29.b, p0/m, z29.b, z28.b");
+
+ COMPARE_MACRO(Suqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "suqadd z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "suqadd z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Suqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "mov z31.d, z29.d\n"
+ "movprfx z29.b, p0/m, z28.b\n"
+ "suqadd z29.b, p0/m, z29.b, z31.b");
+ COMPARE_MACRO(Usqadd(z28.VnB(), p0.Merging(), z28.VnB(), z28.VnB()),
+ "usqadd z28.b, p0/m, z28.b, z28.b");
+ COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z3.VnB()),
+ "movprfx z29.b, p0/m, z28.b\n"
+ "usqadd z29.b, p0/m, z29.b, z3.b");
+ COMPARE_MACRO(Usqadd(z29.VnB(), p0.Merging(), z28.VnB(), z29.VnB()),
+ "mov z31.d, z29.d\n"
+ "movprfx z29.b, p0/m, z28.b\n"
+ "usqadd z29.b, p0/m, z29.b, z31.b");
+
+ CLEANUP();
+}
+
+TEST(sve2_pair_arith) {
+ SETUP();
+
+ COMPARE(addp(z3.VnB(), p1.Merging(), z3.VnB(), z0.VnB()),
+ "addp z3.b, p1/m, z3.b, z0.b");
+ COMPARE(addp(z3.VnD(), p1.Merging(), z3.VnD(), z0.VnD()),
+ "addp z3.d, p1/m, z3.d, z0.d");
+ COMPARE(addp(z3.VnH(), p1.Merging(), z3.VnH(), z0.VnH()),
+ "addp z3.h, p1/m, z3.h, z0.h");
+ COMPARE(addp(z3.VnS(), p1.Merging(), z3.VnS(), z0.VnS()),
+ "addp z3.s, p1/m, z3.s, z0.s");
+ COMPARE(smaxp(z5.VnB(), p4.Merging(), z5.VnB(), z10.VnB()),
+ "smaxp z5.b, p4/m, z5.b, z10.b");
+ COMPARE(smaxp(z5.VnD(), p4.Merging(), z5.VnD(), z10.VnD()),
+ "smaxp z5.d, p4/m, z5.d, z10.d");
+ COMPARE(smaxp(z5.VnH(), p4.Merging(), z5.VnH(), z10.VnH()),
+ "smaxp z5.h, p4/m, z5.h, z10.h");
+ COMPARE(smaxp(z5.VnS(), p4.Merging(), z5.VnS(), z10.VnS()),
+ "smaxp z5.s, p4/m, z5.s, z10.s");
+ COMPARE(sminp(z27.VnB(), p3.Merging(), z27.VnB(), z1.VnB()),
+ "sminp z27.b, p3/m, z27.b, z1.b");
+ COMPARE(sminp(z27.VnD(), p3.Merging(), z27.VnD(), z1.VnD()),
+ "sminp z27.d, p3/m, z27.d, z1.d");
+ COMPARE(sminp(z27.VnH(), p3.Merging(), z27.VnH(), z1.VnH()),
+ "sminp z27.h, p3/m, z27.h, z1.h");
+ COMPARE(sminp(z27.VnS(), p3.Merging(), z27.VnS(), z1.VnS()),
+ "sminp z27.s, p3/m, z27.s, z1.s");
+ COMPARE(umaxp(z7.VnB(), p2.Merging(), z7.VnB(), z23.VnB()),
+ "umaxp z7.b, p2/m, z7.b, z23.b");
+ COMPARE(umaxp(z7.VnD(), p2.Merging(), z7.VnD(), z23.VnD()),
+ "umaxp z7.d, p2/m, z7.d, z23.d");
+ COMPARE(umaxp(z7.VnH(), p2.Merging(), z7.VnH(), z23.VnH()),
+ "umaxp z7.h, p2/m, z7.h, z23.h");
+ COMPARE(umaxp(z7.VnS(), p2.Merging(), z7.VnS(), z23.VnS()),
+ "umaxp z7.s, p2/m, z7.s, z23.s");
+ COMPARE(uminp(z10.VnB(), p0.Merging(), z10.VnB(), z22.VnB()),
+ "uminp z10.b, p0/m, z10.b, z22.b");
+ COMPARE(uminp(z10.VnD(), p0.Merging(), z10.VnD(), z22.VnD()),
+ "uminp z10.d, p0/m, z10.d, z22.d");
+ COMPARE(uminp(z10.VnH(), p0.Merging(), z10.VnH(), z22.VnH()),
+ "uminp z10.h, p0/m, z10.h, z22.h");
+ COMPARE(uminp(z10.VnS(), p0.Merging(), z10.VnS(), z22.VnS()),
+ "uminp z10.s, p0/m, z10.s, z22.s");
+
+ COMPARE_MACRO(Addp(z3.VnB(), p1.Merging(), z3.VnB(), z3.VnB()),
+ "addp z3.b, p1/m, z3.b, z3.b");
+ COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z3.VnB()),
+ "movprfx z4.b, p1/m, z3.b\n"
+ "addp z4.b, p1/m, z4.b, z3.b");
+ COMPARE_MACRO(Addp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
+ "mov z31.d, z4.d\n"
+ "movprfx z4.b, p1/m, z3.b\n"
+ "addp z4.b, p1/m, z4.b, z31.b");
+ COMPARE_MACRO(Smaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
+ "mov z31.d, z4.d\n"
+ "movprfx z4.b, p1/m, z3.b\n"
+ "smaxp z4.b, p1/m, z4.b, z31.b");
+ COMPARE_MACRO(Sminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
+ "mov z31.d, z4.d\n"
+ "movprfx z4.b, p1/m, z3.b\n"
+ "sminp z4.b, p1/m, z4.b, z31.b");
+ COMPARE_MACRO(Umaxp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
+ "mov z31.d, z4.d\n"
+ "movprfx z4.b, p1/m, z3.b\n"
+ "umaxp z4.b, p1/m, z4.b, z31.b");
+ COMPARE_MACRO(Uminp(z4.VnB(), p1.Merging(), z3.VnB(), z4.VnB()),
+ "mov z31.d, z4.d\n"
+ "movprfx z4.b, p1/m, z3.b\n"
+ "uminp z4.b, p1/m, z4.b, z31.b");
+ CLEANUP();
+}
+
+TEST(sve2_extract_narrow) {
+ SETUP();
+
+ COMPARE(sqxtnb(z2.VnB(), z0.VnH()), "sqxtnb z2.b, z0.h");
+ COMPARE(sqxtnb(z2.VnH(), z0.VnS()), "sqxtnb z2.h, z0.s");
+ COMPARE(sqxtnb(z2.VnS(), z0.VnD()), "sqxtnb z2.s, z0.d");
+ COMPARE(sqxtnt(z31.VnB(), z18.VnH()), "sqxtnt z31.b, z18.h");
+ COMPARE(sqxtnt(z31.VnH(), z18.VnS()), "sqxtnt z31.h, z18.s");
+ COMPARE(sqxtnt(z31.VnS(), z18.VnD()), "sqxtnt z31.s, z18.d");
+ COMPARE(sqxtunb(z28.VnB(), z6.VnH()), "sqxtunb z28.b, z6.h");
+ COMPARE(sqxtunb(z28.VnH(), z6.VnS()), "sqxtunb z28.h, z6.s");
+ COMPARE(sqxtunb(z28.VnS(), z6.VnD()), "sqxtunb z28.s, z6.d");
+ COMPARE(sqxtunt(z14.VnB(), z31.VnH()), "sqxtunt z14.b, z31.h");
+ COMPARE(sqxtunt(z14.VnH(), z31.VnS()), "sqxtunt z14.h, z31.s");
+ COMPARE(sqxtunt(z14.VnS(), z31.VnD()), "sqxtunt z14.s, z31.d");
+ COMPARE(uqxtnb(z28.VnB(), z4.VnH()), "uqxtnb z28.b, z4.h");
+ COMPARE(uqxtnb(z28.VnH(), z4.VnS()), "uqxtnb z28.h, z4.s");
+ COMPARE(uqxtnb(z28.VnS(), z4.VnD()), "uqxtnb z28.s, z4.d");
+ COMPARE(uqxtnt(z19.VnB(), z7.VnH()), "uqxtnt z19.b, z7.h");
+ COMPARE(uqxtnt(z19.VnH(), z7.VnS()), "uqxtnt z19.h, z7.s");
+ COMPARE(uqxtnt(z19.VnS(), z7.VnD()), "uqxtnt z19.s, z7.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_eorbt_eortb) {
+ SETUP();
+
+ COMPARE(eorbt(z3.VnB(), z10.VnB(), z8.VnB()), "eorbt z3.b, z10.b, z8.b");
+ COMPARE(eorbt(z3.VnD(), z10.VnD(), z8.VnD()), "eorbt z3.d, z10.d, z8.d");
+ COMPARE(eorbt(z3.VnH(), z10.VnH(), z8.VnH()), "eorbt z3.h, z10.h, z8.h");
+ COMPARE(eorbt(z3.VnS(), z10.VnS(), z8.VnS()), "eorbt z3.s, z10.s, z8.s");
+ COMPARE(eortb(z21.VnB(), z21.VnB(), z15.VnB()), "eortb z21.b, z21.b, z15.b");
+ COMPARE(eortb(z21.VnD(), z21.VnD(), z15.VnD()), "eortb z21.d, z21.d, z15.d");
+ COMPARE(eortb(z21.VnH(), z21.VnH(), z15.VnH()), "eortb z21.h, z21.h, z15.h");
+ COMPARE(eortb(z21.VnS(), z21.VnS(), z15.VnS()), "eortb z21.s, z21.s, z15.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_saturating_multiply_add_high) {
+ SETUP();
+
+ COMPARE(sqrdmlah(z27.VnB(), z28.VnB(), z19.VnB()),
+ "sqrdmlah z27.b, z28.b, z19.b");
+ COMPARE(sqrdmlah(z27.VnD(), z28.VnD(), z19.VnD()),
+ "sqrdmlah z27.d, z28.d, z19.d");
+ COMPARE(sqrdmlah(z27.VnH(), z28.VnH(), z19.VnH()),
+ "sqrdmlah z27.h, z28.h, z19.h");
+ COMPARE(sqrdmlah(z27.VnS(), z28.VnS(), z19.VnS()),
+ "sqrdmlah z27.s, z28.s, z19.s");
+ COMPARE(sqrdmlsh(z11.VnB(), z16.VnB(), z31.VnB()),
+ "sqrdmlsh z11.b, z16.b, z31.b");
+ COMPARE(sqrdmlsh(z11.VnD(), z16.VnD(), z31.VnD()),
+ "sqrdmlsh z11.d, z16.d, z31.d");
+ COMPARE(sqrdmlsh(z11.VnH(), z16.VnH(), z31.VnH()),
+ "sqrdmlsh z11.h, z16.h, z31.h");
+ COMPARE(sqrdmlsh(z11.VnS(), z16.VnS(), z31.VnS()),
+ "sqrdmlsh z11.s, z16.s, z31.s");
+
+ COMPARE_MACRO(Sqrdmlah(z29.VnD(), z0.VnD(), z29.VnD(), z26.VnD()),
+ "movprfx z31, z0\n"
+ "sqrdmlah z31.d, z29.d, z26.d\n"
+ "mov z29.d, z31.d");
+ COMPARE_MACRO(Sqrdmlah(z26.VnH(), z0.VnH(), z29.VnH(), z26.VnH()),
+ "movprfx z31, z0\n"
+ "sqrdmlah z31.h, z29.h, z26.h\n"
+ "mov z26.d, z31.d");
+ COMPARE_MACRO(Sqrdmlsh(z23.VnS(), z31.VnS(), z26.VnS(), z29.VnS()),
+ "movprfx z23, z31\n"
+ "sqrdmlsh z23.s, z26.s, z29.s");
+ COMPARE_MACRO(Sqrdmlsh(z4.VnB(), z31.VnB(), z4.VnB(), z4.VnB()),
+ "sqrdmlsh z31.b, z4.b, z4.b\n"
+ "mov z4.d, z31.d");
+
+ COMPARE(sqrdmlah(z10.VnD(), z30.VnD(), z11.VnD(), 1),
+ "sqrdmlah z10.d, z30.d, z11.d[1]");
+ COMPARE(sqrdmlah(z11.VnH(), z8.VnH(), z3.VnH(), 7),
+ "sqrdmlah z11.h, z8.h, z3.h[7]");
+ COMPARE(sqrdmlah(z21.VnS(), z29.VnS(), z7.VnS(), 3),
+ "sqrdmlah z21.s, z29.s, z7.s[3]");
+ COMPARE(sqrdmlsh(z2.VnD(), z16.VnD(), z14.VnD(), 0),
+ "sqrdmlsh z2.d, z16.d, z14.d[0]");
+ COMPARE(sqrdmlsh(z23.VnH(), z13.VnH(), z6.VnH(), 5),
+ "sqrdmlsh z23.h, z13.h, z6.h[5]");
+ COMPARE(sqrdmlsh(z27.VnS(), z8.VnS(), z4.VnS(), 2),
+ "sqrdmlsh z27.s, z8.s, z4.s[2]");
+
+ COMPARE_MACRO(Sqrdmlah(z24.VnD(), z0.VnD(), z24.VnD(), z13.VnD(), 0),
+ "movprfx z31, z0\n"
+ "sqrdmlah z31.d, z24.d, z13.d[0]\n"
+ "mov z24.d, z31.d");
+ COMPARE_MACRO(Sqrdmlah(z4.VnH(), z0.VnH(), z29.VnH(), z4.VnH(), 6),
+ "movprfx z31, z0\n"
+ "sqrdmlah z31.h, z29.h, z4.h[6]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Sqrdmlsh(z12.VnS(), z31.VnS(), z26.VnS(), z2.VnS(), 2),
+ "movprfx z12, z31\n"
+ "sqrdmlsh z12.s, z26.s, z2.s[2]");
+ COMPARE_MACRO(Sqrdmlsh(z0.VnD(), z31.VnD(), z0.VnD(), z0.VnD(), 1),
+ "sqrdmlsh z31.d, z0.d, z0.d[1]\n"
+ "mov z0.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_integer_pairwise_add_accumulate_long) {
+ SETUP();
+
+ COMPARE(sadalp(z19.VnD(), p5.Merging(), z9.VnS()),
+ "sadalp z19.d, p5/m, z9.s");
+ COMPARE(sadalp(z19.VnH(), p5.Merging(), z9.VnB()),
+ "sadalp z19.h, p5/m, z9.b");
+ COMPARE(sadalp(z19.VnS(), p5.Merging(), z9.VnH()),
+ "sadalp z19.s, p5/m, z9.h");
+ COMPARE(uadalp(z20.VnD(), p4.Merging(), z5.VnS()),
+ "uadalp z20.d, p4/m, z5.s");
+ COMPARE(uadalp(z20.VnH(), p4.Merging(), z5.VnB()),
+ "uadalp z20.h, p4/m, z5.b");
+ COMPARE(uadalp(z20.VnS(), p4.Merging(), z5.VnH()),
+ "uadalp z20.s, p4/m, z5.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_integer_multiply_vectors_unpredicated) {
+ SETUP();
+
+ COMPARE(mul(z23.VnB(), z0.VnB(), z12.VnB()), "mul z23.b, z0.b, z12.b");
+ COMPARE(mul(z24.VnD(), z1.VnD(), z14.VnD()), "mul z24.d, z1.d, z14.d");
+ COMPARE(mul(z25.VnH(), z2.VnH(), z16.VnH()), "mul z25.h, z2.h, z16.h");
+ COMPARE(mul(z26.VnS(), z3.VnS(), z18.VnS()), "mul z26.s, z3.s, z18.s");
+
+ COMPARE(pmul(z0.VnB(), z5.VnB(), z5.VnB()), "pmul z0.b, z5.b, z5.b");
+
+ COMPARE(smulh(z11.VnB(), z9.VnB(), z1.VnB()), "smulh z11.b, z9.b, z1.b");
+ COMPARE(smulh(z21.VnD(), z19.VnD(), z16.VnD()), "smulh z21.d, z19.d, z16.d");
+ COMPARE(smulh(z11.VnH(), z9.VnH(), z1.VnH()), "smulh z11.h, z9.h, z1.h");
+ COMPARE(smulh(z21.VnS(), z19.VnS(), z16.VnS()), "smulh z21.s, z19.s, z16.s");
+
+ COMPARE(umulh(z5.VnB(), z9.VnB(), z5.VnB()), "umulh z5.b, z9.b, z5.b");
+ COMPARE(umulh(z18.VnD(), z9.VnD(), z5.VnD()), "umulh z18.d, z9.d, z5.d");
+ COMPARE(umulh(z18.VnH(), z9.VnH(), z9.VnH()), "umulh z18.h, z9.h, z9.h");
+ COMPARE(umulh(z18.VnS(), z9.VnS(), z18.VnS()), "umulh z18.s, z9.s, z18.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_arith_interleaved_long) {
+ SETUP();
+
+ COMPARE(saddlbt(z15.VnD(), z6.VnS(), z18.VnS()),
+ "saddlbt z15.d, z6.s, z18.s");
+ COMPARE(saddlbt(z15.VnH(), z6.VnB(), z18.VnB()),
+ "saddlbt z15.h, z6.b, z18.b");
+ COMPARE(saddlbt(z15.VnS(), z6.VnH(), z18.VnH()),
+ "saddlbt z15.s, z6.h, z18.h");
+ COMPARE(ssublbt(z6.VnD(), z28.VnS(), z12.VnS()),
+ "ssublbt z6.d, z28.s, z12.s");
+ COMPARE(ssublbt(z6.VnH(), z28.VnB(), z12.VnB()),
+ "ssublbt z6.h, z28.b, z12.b");
+ COMPARE(ssublbt(z6.VnS(), z28.VnH(), z12.VnH()),
+ "ssublbt z6.s, z28.h, z12.h");
+ COMPARE(ssubltb(z11.VnD(), z18.VnS(), z19.VnS()),
+ "ssubltb z11.d, z18.s, z19.s");
+ COMPARE(ssubltb(z11.VnH(), z18.VnB(), z19.VnB()),
+ "ssubltb z11.h, z18.b, z19.b");
+ COMPARE(ssubltb(z11.VnS(), z18.VnH(), z19.VnH()),
+ "ssubltb z11.s, z18.h, z19.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_int_unary_predicated) {
+ SETUP();
+
+ COMPARE_MACRO(Sqabs(z29.VnB(), p1.Merging(), z18.VnB()),
+ "sqabs z29.b, p1/m, z18.b");
+ COMPARE_MACRO(Sqabs(z29.VnD(), p1.Merging(), z18.VnD()),
+ "sqabs z29.d, p1/m, z18.d");
+ COMPARE_MACRO(Sqabs(z29.VnH(), p1.Merging(), z18.VnH()),
+ "sqabs z29.h, p1/m, z18.h");
+ COMPARE_MACRO(Sqabs(z29.VnS(), p1.Merging(), z18.VnS()),
+ "sqabs z29.s, p1/m, z18.s");
+ COMPARE_MACRO(Sqneg(z21.VnB(), p0.Merging(), z17.VnB()),
+ "sqneg z21.b, p0/m, z17.b");
+ COMPARE_MACRO(Sqneg(z21.VnD(), p0.Merging(), z17.VnD()),
+ "sqneg z21.d, p0/m, z17.d");
+ COMPARE_MACRO(Sqneg(z21.VnH(), p0.Merging(), z17.VnH()),
+ "sqneg z21.h, p0/m, z17.h");
+ COMPARE_MACRO(Sqneg(z21.VnS(), p0.Merging(), z17.VnS()),
+ "sqneg z21.s, p0/m, z17.s");
+ COMPARE_MACRO(Urecpe(z25.VnS(), p7.Merging(), z2.VnS()),
+ "urecpe z25.s, p7/m, z2.s");
+ COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Merging(), z3.VnS()),
+ "ursqrte z4.s, p3/m, z3.s");
+
+ COMPARE_MACRO(Sqabs(z29.VnS(), p1.Zeroing(), z18.VnS()),
+ "movprfx z29.s, p1/z, z29.s\n"
+ "sqabs z29.s, p1/m, z18.s");
+ COMPARE_MACRO(Sqneg(z21.VnB(), p0.Zeroing(), z17.VnB()),
+ "movprfx z21.b, p0/z, z21.b\n"
+ "sqneg z21.b, p0/m, z17.b");
+ COMPARE_MACRO(Urecpe(z25.VnS(), p7.Zeroing(), z2.VnS()),
+ "movprfx z25.s, p7/z, z25.s\n"
+ "urecpe z25.s, p7/m, z2.s");
+ COMPARE_MACRO(Ursqrte(z4.VnS(), p3.Zeroing(), z3.VnS()),
+ "movprfx z4.s, p3/z, z4.s\n"
+ "ursqrte z4.s, p3/m, z3.s");
+ CLEANUP();
+}
+
+TEST(sve2_arith_long) {
+ SETUP();
+
+ COMPARE_MACRO(Sabdlb(z2.VnD(), z21.VnS(), z3.VnS()),
+ "sabdlb z2.d, z21.s, z3.s");
+ COMPARE_MACRO(Sabdlb(z2.VnH(), z21.VnB(), z3.VnB()),
+ "sabdlb z2.h, z21.b, z3.b");
+ COMPARE_MACRO(Sabdlb(z2.VnS(), z21.VnH(), z3.VnH()),
+ "sabdlb z2.s, z21.h, z3.h");
+ COMPARE_MACRO(Sabdlt(z25.VnD(), z23.VnS(), z17.VnS()),
+ "sabdlt z25.d, z23.s, z17.s");
+ COMPARE_MACRO(Sabdlt(z25.VnH(), z23.VnB(), z17.VnB()),
+ "sabdlt z25.h, z23.b, z17.b");
+ COMPARE_MACRO(Sabdlt(z25.VnS(), z23.VnH(), z17.VnH()),
+ "sabdlt z25.s, z23.h, z17.h");
+ COMPARE_MACRO(Saddlb(z24.VnD(), z30.VnS(), z16.VnS()),
+ "saddlb z24.d, z30.s, z16.s");
+ COMPARE_MACRO(Saddlb(z24.VnH(), z30.VnB(), z16.VnB()),
+ "saddlb z24.h, z30.b, z16.b");
+ COMPARE_MACRO(Saddlb(z24.VnS(), z30.VnH(), z16.VnH()),
+ "saddlb z24.s, z30.h, z16.h");
+ COMPARE_MACRO(Saddlt(z21.VnD(), z29.VnS(), z31.VnS()),
+ "saddlt z21.d, z29.s, z31.s");
+ COMPARE_MACRO(Saddlt(z21.VnH(), z29.VnB(), z31.VnB()),
+ "saddlt z21.h, z29.b, z31.b");
+ COMPARE_MACRO(Saddlt(z21.VnS(), z29.VnH(), z31.VnH()),
+ "saddlt z21.s, z29.h, z31.h");
+ COMPARE_MACRO(Ssublb(z4.VnD(), z23.VnS(), z7.VnS()),
+ "ssublb z4.d, z23.s, z7.s");
+ COMPARE_MACRO(Ssublb(z4.VnH(), z23.VnB(), z7.VnB()),
+ "ssublb z4.h, z23.b, z7.b");
+ COMPARE_MACRO(Ssublb(z4.VnS(), z23.VnH(), z7.VnH()),
+ "ssublb z4.s, z23.h, z7.h");
+ COMPARE_MACRO(Ssublt(z12.VnD(), z13.VnS(), z6.VnS()),
+ "ssublt z12.d, z13.s, z6.s");
+ COMPARE_MACRO(Ssublt(z12.VnH(), z13.VnB(), z6.VnB()),
+ "ssublt z12.h, z13.b, z6.b");
+ COMPARE_MACRO(Ssublt(z12.VnS(), z13.VnH(), z6.VnH()),
+ "ssublt z12.s, z13.h, z6.h");
+ COMPARE_MACRO(Uabdlb(z1.VnD(), z26.VnS(), z12.VnS()),
+ "uabdlb z1.d, z26.s, z12.s");
+ COMPARE_MACRO(Uabdlb(z1.VnH(), z26.VnB(), z12.VnB()),
+ "uabdlb z1.h, z26.b, z12.b");
+ COMPARE_MACRO(Uabdlb(z1.VnS(), z26.VnH(), z12.VnH()),
+ "uabdlb z1.s, z26.h, z12.h");
+ COMPARE_MACRO(Uabdlt(z25.VnD(), z29.VnS(), z14.VnS()),
+ "uabdlt z25.d, z29.s, z14.s");
+ COMPARE_MACRO(Uabdlt(z25.VnH(), z29.VnB(), z14.VnB()),
+ "uabdlt z25.h, z29.b, z14.b");
+ COMPARE_MACRO(Uabdlt(z25.VnS(), z29.VnH(), z14.VnH()),
+ "uabdlt z25.s, z29.h, z14.h");
+ COMPARE_MACRO(Uaddlb(z3.VnD(), z5.VnS(), z2.VnS()),
+ "uaddlb z3.d, z5.s, z2.s");
+ COMPARE_MACRO(Uaddlb(z3.VnH(), z5.VnB(), z2.VnB()),
+ "uaddlb z3.h, z5.b, z2.b");
+ COMPARE_MACRO(Uaddlb(z3.VnS(), z5.VnH(), z2.VnH()),
+ "uaddlb z3.s, z5.h, z2.h");
+ COMPARE_MACRO(Uaddlt(z15.VnD(), z28.VnS(), z20.VnS()),
+ "uaddlt z15.d, z28.s, z20.s");
+ COMPARE_MACRO(Uaddlt(z15.VnH(), z28.VnB(), z20.VnB()),
+ "uaddlt z15.h, z28.b, z20.b");
+ COMPARE_MACRO(Uaddlt(z15.VnS(), z28.VnH(), z20.VnH()),
+ "uaddlt z15.s, z28.h, z20.h");
+ COMPARE_MACRO(Usublb(z25.VnD(), z9.VnS(), z17.VnS()),
+ "usublb z25.d, z9.s, z17.s");
+ COMPARE_MACRO(Usublb(z25.VnH(), z9.VnB(), z17.VnB()),
+ "usublb z25.h, z9.b, z17.b");
+ COMPARE_MACRO(Usublb(z25.VnS(), z9.VnH(), z17.VnH()),
+ "usublb z25.s, z9.h, z17.h");
+ COMPARE_MACRO(Usublt(z5.VnD(), z11.VnS(), z15.VnS()),
+ "usublt z5.d, z11.s, z15.s");
+ COMPARE_MACRO(Usublt(z5.VnH(), z11.VnB(), z15.VnB()),
+ "usublt z5.h, z11.b, z15.b");
+ COMPARE_MACRO(Usublt(z5.VnS(), z11.VnH(), z15.VnH()),
+ "usublt z5.s, z11.h, z15.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_arith_wide) {
+ SETUP();
+
+ COMPARE_MACRO(Saddwb(z12.VnD(), z8.VnD(), z8.VnS()),
+ "saddwb z12.d, z8.d, z8.s");
+ COMPARE_MACRO(Saddwb(z12.VnH(), z8.VnH(), z8.VnB()),
+ "saddwb z12.h, z8.h, z8.b");
+ COMPARE_MACRO(Saddwb(z12.VnS(), z8.VnS(), z8.VnH()),
+ "saddwb z12.s, z8.s, z8.h");
+ COMPARE_MACRO(Saddwt(z24.VnD(), z0.VnD(), z3.VnS()),
+ "saddwt z24.d, z0.d, z3.s");
+ COMPARE_MACRO(Saddwt(z24.VnH(), z0.VnH(), z3.VnB()),
+ "saddwt z24.h, z0.h, z3.b");
+ COMPARE_MACRO(Saddwt(z24.VnS(), z0.VnS(), z3.VnH()),
+ "saddwt z24.s, z0.s, z3.h");
+ COMPARE_MACRO(Ssubwb(z7.VnD(), z28.VnD(), z11.VnS()),
+ "ssubwb z7.d, z28.d, z11.s");
+ COMPARE_MACRO(Ssubwb(z7.VnH(), z28.VnH(), z11.VnB()),
+ "ssubwb z7.h, z28.h, z11.b");
+ COMPARE_MACRO(Ssubwb(z7.VnS(), z28.VnS(), z11.VnH()),
+ "ssubwb z7.s, z28.s, z11.h");
+ COMPARE_MACRO(Ssubwt(z29.VnD(), z25.VnD(), z20.VnS()),
+ "ssubwt z29.d, z25.d, z20.s");
+ COMPARE_MACRO(Ssubwt(z29.VnH(), z25.VnH(), z20.VnB()),
+ "ssubwt z29.h, z25.h, z20.b");
+ COMPARE_MACRO(Ssubwt(z29.VnS(), z25.VnS(), z20.VnH()),
+ "ssubwt z29.s, z25.s, z20.h");
+ COMPARE_MACRO(Uaddwb(z31.VnD(), z8.VnD(), z25.VnS()),
+ "uaddwb z31.d, z8.d, z25.s");
+ COMPARE_MACRO(Uaddwb(z31.VnH(), z8.VnH(), z25.VnB()),
+ "uaddwb z31.h, z8.h, z25.b");
+ COMPARE_MACRO(Uaddwb(z31.VnS(), z8.VnS(), z25.VnH()),
+ "uaddwb z31.s, z8.s, z25.h");
+ COMPARE_MACRO(Uaddwt(z17.VnD(), z15.VnD(), z2.VnS()),
+ "uaddwt z17.d, z15.d, z2.s");
+ COMPARE_MACRO(Uaddwt(z17.VnH(), z15.VnH(), z2.VnB()),
+ "uaddwt z17.h, z15.h, z2.b");
+ COMPARE_MACRO(Uaddwt(z17.VnS(), z15.VnS(), z2.VnH()),
+ "uaddwt z17.s, z15.s, z2.h");
+ COMPARE_MACRO(Usubwb(z10.VnD(), z13.VnD(), z20.VnS()),
+ "usubwb z10.d, z13.d, z20.s");
+ COMPARE_MACRO(Usubwb(z10.VnH(), z13.VnH(), z20.VnB()),
+ "usubwb z10.h, z13.h, z20.b");
+ COMPARE_MACRO(Usubwb(z10.VnS(), z13.VnS(), z20.VnH()),
+ "usubwb z10.s, z13.s, z20.h");
+ COMPARE_MACRO(Usubwt(z15.VnD(), z8.VnD(), z23.VnS()),
+ "usubwt z15.d, z8.d, z23.s");
+ COMPARE_MACRO(Usubwt(z15.VnH(), z8.VnH(), z23.VnB()),
+ "usubwt z15.h, z8.h, z23.b");
+ COMPARE_MACRO(Usubwt(z15.VnS(), z8.VnS(), z23.VnH()),
+ "usubwt z15.s, z8.s, z23.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_shift_long) {
+ SETUP();
+
+ COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 0), "sshllb z2.h, z20.b, #0");
+ COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 1), "sshllb z2.h, z20.b, #1");
+ COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 5), "sshllb z2.h, z20.b, #5");
+ COMPARE_MACRO(Sshllb(z2.VnH(), z20.VnB(), 7), "sshllb z2.h, z20.b, #7");
+ COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 0), "sshllb z2.s, z20.h, #0");
+ COMPARE_MACRO(Sshllb(z2.VnS(), z20.VnH(), 15), "sshllb z2.s, z20.h, #15");
+ COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 0), "sshllb z2.d, z20.s, #0");
+ COMPARE_MACRO(Sshllb(z2.VnD(), z20.VnS(), 31), "sshllb z2.d, z20.s, #31");
+ COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 0), "sshllt z27.h, z8.b, #0");
+ COMPARE_MACRO(Sshllt(z27.VnH(), z8.VnB(), 7), "sshllt z27.h, z8.b, #7");
+ COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 0), "sshllt z27.s, z8.h, #0");
+ COMPARE_MACRO(Sshllt(z27.VnS(), z8.VnH(), 15), "sshllt z27.s, z8.h, #15");
+ COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 0), "sshllt z27.d, z8.s, #0");
+ COMPARE_MACRO(Sshllt(z27.VnD(), z8.VnS(), 31), "sshllt z27.d, z8.s, #31");
+ COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 0), "ushllb z8.h, z31.b, #0");
+ COMPARE_MACRO(Ushllb(z8.VnH(), z31.VnB(), 7), "ushllb z8.h, z31.b, #7");
+ COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 0), "ushllb z8.s, z31.h, #0");
+ COMPARE_MACRO(Ushllb(z8.VnS(), z31.VnH(), 15), "ushllb z8.s, z31.h, #15");
+ COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 0), "ushllb z8.d, z31.s, #0");
+ COMPARE_MACRO(Ushllb(z8.VnD(), z31.VnS(), 31), "ushllb z8.d, z31.s, #31");
+ COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 0), "ushllt z3.h, z21.b, #0");
+ COMPARE_MACRO(Ushllt(z3.VnH(), z21.VnB(), 7), "ushllt z3.h, z21.b, #7");
+ COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 0), "ushllt z3.s, z21.h, #0");
+ COMPARE_MACRO(Ushllt(z3.VnS(), z21.VnH(), 15), "ushllt z3.s, z21.h, #15");
+ COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 0), "ushllt z3.d, z21.s, #0");
+ COMPARE_MACRO(Ushllt(z3.VnD(), z21.VnS(), 31), "ushllt z3.d, z21.s, #31");
+
+ CLEANUP();
+}
+
+TEST(sve2_shift_narrow) {
+ SETUP();
+
+ COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 1), "shrnb z7.b, z4.h, #1");
+ COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 2), "shrnb z7.b, z4.h, #2");
+ COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 5), "shrnb z7.b, z4.h, #5");
+ COMPARE_MACRO(Shrnb(z7.VnB(), z4.VnH(), 8), "shrnb z7.b, z4.h, #8");
+ COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 1), "shrnb z7.h, z4.s, #1");
+ COMPARE_MACRO(Shrnb(z7.VnH(), z4.VnS(), 16), "shrnb z7.h, z4.s, #16");
+ COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 1), "shrnb z7.s, z4.d, #1");
+ COMPARE_MACRO(Shrnb(z7.VnS(), z4.VnD(), 32), "shrnb z7.s, z4.d, #32");
+ COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 1), "shrnt z21.b, z29.h, #1");
+ COMPARE_MACRO(Shrnt(z21.VnB(), z29.VnH(), 8), "shrnt z21.b, z29.h, #8");
+ COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 1), "shrnt z21.h, z29.s, #1");
+ COMPARE_MACRO(Shrnt(z21.VnH(), z29.VnS(), 16), "shrnt z21.h, z29.s, #16");
+ COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 1), "shrnt z21.s, z29.d, #1");
+ COMPARE_MACRO(Shrnt(z21.VnS(), z29.VnD(), 32), "shrnt z21.s, z29.d, #32");
+
+ COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 1), "rshrnb z5.b, z1.h, #1");
+ COMPARE_MACRO(Rshrnb(z5.VnB(), z1.VnH(), 8), "rshrnb z5.b, z1.h, #8");
+ COMPARE_MACRO(Rshrnb(z5.VnH(), z1.VnS(), 16), "rshrnb z5.h, z1.s, #16");
+ COMPARE_MACRO(Rshrnb(z5.VnS(), z1.VnD(), 32), "rshrnb z5.s, z1.d, #32");
+ COMPARE_MACRO(Rshrnt(z5.VnB(), z1.VnH(), 8), "rshrnt z5.b, z1.h, #8");
+ COMPARE_MACRO(Rshrnt(z5.VnH(), z1.VnS(), 16), "rshrnt z5.h, z1.s, #16");
+ COMPARE_MACRO(Rshrnt(z5.VnS(), z1.VnD(), 32), "rshrnt z5.s, z1.d, #32");
+
+ COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 1), "sqrshrnb z1.b, z1.h, #1");
+ COMPARE_MACRO(Sqrshrnb(z1.VnB(), z1.VnH(), 8), "sqrshrnb z1.b, z1.h, #8");
+ COMPARE_MACRO(Sqrshrnb(z1.VnH(), z1.VnS(), 16), "sqrshrnb z1.h, z1.s, #16");
+ COMPARE_MACRO(Sqrshrnb(z1.VnS(), z1.VnD(), 32), "sqrshrnb z1.s, z1.d, #32");
+ COMPARE_MACRO(Sqrshrnt(z24.VnB(), z19.VnH(), 8), "sqrshrnt z24.b, z19.h, #8");
+ COMPARE_MACRO(Sqrshrnt(z24.VnH(), z19.VnS(), 16),
+ "sqrshrnt z24.h, z19.s, #16");
+ COMPARE_MACRO(Sqrshrnt(z24.VnS(), z19.VnD(), 32),
+ "sqrshrnt z24.s, z19.d, #32");
+
+ COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 1), "sqshrnb z25.b, z1.h, #1");
+ COMPARE_MACRO(Sqshrnb(z25.VnB(), z1.VnH(), 8), "sqshrnb z25.b, z1.h, #8");
+ COMPARE_MACRO(Sqshrnb(z25.VnH(), z1.VnS(), 16), "sqshrnb z25.h, z1.s, #16");
+ COMPARE_MACRO(Sqshrnb(z25.VnS(), z1.VnD(), 32), "sqshrnb z25.s, z1.d, #32");
+ COMPARE_MACRO(Sqshrnt(z0.VnB(), z25.VnH(), 8), "sqshrnt z0.b, z25.h, #8");
+ COMPARE_MACRO(Sqshrnt(z0.VnH(), z25.VnS(), 16), "sqshrnt z0.h, z25.s, #16");
+ COMPARE_MACRO(Sqshrnt(z0.VnS(), z25.VnD(), 32), "sqshrnt z0.s, z25.d, #32");
+
+ COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 1), "uqrshrnb z30.b, z25.h, #1");
+ COMPARE_MACRO(Uqrshrnb(z30.VnB(), z25.VnH(), 8), "uqrshrnb z30.b, z25.h, #8");
+ COMPARE_MACRO(Uqrshrnb(z30.VnH(), z25.VnS(), 16),
+ "uqrshrnb z30.h, z25.s, #16");
+ COMPARE_MACRO(Uqrshrnb(z30.VnS(), z25.VnD(), 32),
+ "uqrshrnb z30.s, z25.d, #32");
+ COMPARE_MACRO(Uqrshrnt(z3.VnB(), z25.VnH(), 8), "uqrshrnt z3.b, z25.h, #8");
+ COMPARE_MACRO(Uqrshrnt(z3.VnH(), z25.VnS(), 16), "uqrshrnt z3.h, z25.s, #16");
+ COMPARE_MACRO(Uqrshrnt(z3.VnS(), z25.VnD(), 32), "uqrshrnt z3.s, z25.d, #32");
+
+ COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 1), "uqshrnb z17.b, z4.h, #1");
+ COMPARE_MACRO(Uqshrnb(z17.VnB(), z4.VnH(), 8), "uqshrnb z17.b, z4.h, #8");
+ COMPARE_MACRO(Uqshrnb(z17.VnH(), z4.VnS(), 16), "uqshrnb z17.h, z4.s, #16");
+ COMPARE_MACRO(Uqshrnb(z17.VnS(), z4.VnD(), 32), "uqshrnb z17.s, z4.d, #32");
+ COMPARE_MACRO(Uqshrnt(z28.VnB(), z18.VnH(), 8), "uqshrnt z28.b, z18.h, #8");
+ COMPARE_MACRO(Uqshrnt(z28.VnH(), z18.VnS(), 16), "uqshrnt z28.h, z18.s, #16");
+ COMPARE_MACRO(Uqshrnt(z28.VnS(), z18.VnD(), 32), "uqshrnt z28.s, z18.d, #32");
+
+ COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 1),
+ "sqrshrunb z23.b, z28.h, #1");
+ COMPARE_MACRO(Sqrshrunb(z23.VnB(), z28.VnH(), 8),
+ "sqrshrunb z23.b, z28.h, #8");
+ COMPARE_MACRO(Sqrshrunb(z23.VnH(), z28.VnS(), 16),
+ "sqrshrunb z23.h, z28.s, #16");
+ COMPARE_MACRO(Sqrshrunb(z23.VnS(), z28.VnD(), 32),
+ "sqrshrunb z23.s, z28.d, #32");
+ COMPARE_MACRO(Sqrshrunt(z9.VnB(), z15.VnH(), 8), "sqrshrunt z9.b, z15.h, #8");
+ COMPARE_MACRO(Sqrshrunt(z9.VnH(), z15.VnS(), 16),
+ "sqrshrunt z9.h, z15.s, #16");
+ COMPARE_MACRO(Sqrshrunt(z9.VnS(), z15.VnD(), 32),
+ "sqrshrunt z9.s, z15.d, #32");
+
+ COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 1), "sqshrunb z25.b, z10.h, #1");
+ COMPARE_MACRO(Sqshrunb(z25.VnB(), z10.VnH(), 8), "sqshrunb z25.b, z10.h, #8");
+ COMPARE_MACRO(Sqshrunb(z25.VnH(), z10.VnS(), 16),
+ "sqshrunb z25.h, z10.s, #16");
+ COMPARE_MACRO(Sqshrunb(z25.VnS(), z10.VnD(), 32),
+ "sqshrunb z25.s, z10.d, #32");
+ COMPARE_MACRO(Sqshrunt(z20.VnB(), z3.VnH(), 8), "sqshrunt z20.b, z3.h, #8");
+ COMPARE_MACRO(Sqshrunt(z20.VnH(), z3.VnS(), 16), "sqshrunt z20.h, z3.s, #16");
+ COMPARE_MACRO(Sqshrunt(z20.VnS(), z3.VnD(), 32), "sqshrunt z20.s, z3.d, #32");
+
+ CLEANUP();
+}
+
+TEST(sve2_aba_long) {
+ SETUP();
+
+ COMPARE(sabalb(z13.VnD(), z20.VnS(), z26.VnS()),
+ "sabalb z13.d, z20.s, z26.s");
+ COMPARE(sabalb(z13.VnH(), z20.VnB(), z26.VnB()),
+ "sabalb z13.h, z20.b, z26.b");
+ COMPARE(sabalb(z13.VnS(), z20.VnH(), z26.VnH()),
+ "sabalb z13.s, z20.h, z26.h");
+ COMPARE(sabalt(z14.VnD(), z19.VnS(), z10.VnS()),
+ "sabalt z14.d, z19.s, z10.s");
+ COMPARE(sabalt(z14.VnH(), z19.VnB(), z10.VnB()),
+ "sabalt z14.h, z19.b, z10.b");
+ COMPARE(sabalt(z14.VnS(), z19.VnH(), z10.VnH()),
+ "sabalt z14.s, z19.h, z10.h");
+ COMPARE(uabalb(z11.VnD(), z25.VnS(), z11.VnS()),
+ "uabalb z11.d, z25.s, z11.s");
+ COMPARE(uabalb(z11.VnH(), z25.VnB(), z11.VnB()),
+ "uabalb z11.h, z25.b, z11.b");
+ COMPARE(uabalb(z11.VnS(), z25.VnH(), z11.VnH()),
+ "uabalb z11.s, z25.h, z11.h");
+ COMPARE(uabalt(z4.VnD(), z2.VnS(), z31.VnS()), "uabalt z4.d, z2.s, z31.s");
+ COMPARE(uabalt(z4.VnH(), z2.VnB(), z31.VnB()), "uabalt z4.h, z2.b, z31.b");
+ COMPARE(uabalt(z4.VnS(), z2.VnH(), z31.VnH()), "uabalt z4.s, z2.h, z31.h");
+
+ COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()),
+ "sabalb z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()),
+ "sabalt z12.h, z3.b, z12.b");
+ COMPARE_MACRO(Sabalb(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()),
+ "sabalb z12.h, z12.b, z30.b");
+ COMPARE_MACRO(Sabalt(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), "");
+ COMPARE_MACRO(Sabalb(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z13\n"
+ "sabalb z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z3\n"
+ "sabalt z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Sabalb(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z30\n"
+ "sabalb z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()),
+ "mov z12.d, z3.d");
+ COMPARE_MACRO(Sabalb(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "sabalb z12.h, z31.b, z3.b");
+ COMPARE_MACRO(Sabalt(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "sabalt z12.h, z3.b, z31.b");
+
+ COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z3.VnB(), z30.VnB()),
+ "uabalt z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z3.VnB(), z12.VnB()),
+ "uabalb z12.h, z3.b, z12.b");
+ COMPARE_MACRO(Uabalt(z12.VnH(), z12.VnH(), z12.VnB(), z30.VnB()),
+ "uabalt z12.h, z12.b, z30.b");
+ COMPARE_MACRO(Uabalb(z12.VnH(), z12.VnH(), z12.VnB(), z12.VnB()), "");
+ COMPARE_MACRO(Uabalt(z12.VnH(), z13.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z13\n"
+ "uabalt z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z3\n"
+ "uabalb z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Uabalt(z12.VnH(), z30.VnH(), z3.VnB(), z30.VnB()),
+ "movprfx z12, z30\n"
+ "uabalt z12.h, z3.b, z30.b");
+ COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z3.VnB()),
+ "mov z12.d, z3.d");
+ COMPARE_MACRO(Uabalt(z12.VnH(), z3.VnH(), z12.VnB(), z3.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "uabalt z12.h, z31.b, z3.b");
+ COMPARE_MACRO(Uabalb(z12.VnH(), z3.VnH(), z3.VnB(), z12.VnB()),
+ "mov z31.d, z12.d\n"
+ "movprfx z12, z3\n"
+ "uabalb z12.h, z3.b, z31.b");
+ CLEANUP();
+}
+
+TEST(sve2_add_sub_carry) {
+ SETUP();
+
+ COMPARE(adclb(z25.VnS(), z17.VnS(), z24.VnS()), "adclb z25.s, z17.s, z24.s");
+ COMPARE(adclb(z25.VnD(), z17.VnD(), z24.VnD()), "adclb z25.d, z17.d, z24.d");
+ COMPARE(adclt(z0.VnS(), z2.VnS(), z15.VnS()), "adclt z0.s, z2.s, z15.s");
+ COMPARE(adclt(z0.VnD(), z2.VnD(), z15.VnD()), "adclt z0.d, z2.d, z15.d");
+ COMPARE(sbclb(z17.VnS(), z10.VnS(), z8.VnS()), "sbclb z17.s, z10.s, z8.s");
+ COMPARE(sbclb(z17.VnD(), z10.VnD(), z8.VnD()), "sbclb z17.d, z10.d, z8.d");
+ COMPARE(sbclt(z20.VnS(), z0.VnS(), z13.VnS()), "sbclt z20.s, z0.s, z13.s");
+ COMPARE(sbclt(z20.VnD(), z0.VnD(), z13.VnD()), "sbclt z20.d, z0.d, z13.d");
+
+ COMPARE_MACRO(Adclb(z25.VnS(), z25.VnS(), z17.VnS(), z24.VnS()),
+ "adclb z25.s, z17.s, z24.s");
+ COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z17.VnS(), z24.VnS()),
+ "movprfx z25, z20\n"
+ "adclb z25.s, z17.s, z24.s");
+ COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z24.VnS()),
+ "movprfx z31, z20\n"
+ "adclb z31.s, z25.s, z24.s\n"
+ "mov z25.d, z31.d");
+ COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z24.VnS(), z25.VnS()),
+ "movprfx z31, z20\n"
+ "adclb z31.s, z24.s, z25.s\n"
+ "mov z25.d, z31.d");
+ COMPARE_MACRO(Adclb(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()),
+ "movprfx z31, z20\n"
+ "adclb z31.s, z25.s, z25.s\n"
+ "mov z25.d, z31.d");
+ COMPARE_MACRO(Adclt(z25.VnS(), z20.VnS(), z25.VnS(), z25.VnS()),
+ "movprfx z31, z20\n"
+ "adclt z31.s, z25.s, z25.s\n"
+ "mov z25.d, z31.d");
+
+ COMPARE_MACRO(Sbclb(z30.VnS(), z30.VnS(), z7.VnS(), z29.VnS()),
+ "sbclb z30.s, z7.s, z29.s");
+ COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z7.VnS(), z29.VnS()),
+ "movprfx z30, z2\n"
+ "sbclb z30.s, z7.s, z29.s");
+ COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z29.VnS()),
+ "movprfx z31, z2\n"
+ "sbclb z31.s, z30.s, z29.s\n"
+ "mov z30.d, z31.d");
+ COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z29.VnS(), z30.VnS()),
+ "movprfx z31, z2\n"
+ "sbclb z31.s, z29.s, z30.s\n"
+ "mov z30.d, z31.d");
+ COMPARE_MACRO(Sbclb(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()),
+ "movprfx z31, z2\n"
+ "sbclb z31.s, z30.s, z30.s\n"
+ "mov z30.d, z31.d");
+ COMPARE_MACRO(Sbclt(z30.VnS(), z2.VnS(), z30.VnS(), z30.VnS()),
+ "movprfx z31, z2\n"
+ "sbclt z31.s, z30.s, z30.s\n"
+ "mov z30.d, z31.d");
+ CLEANUP();
+}
+
+TEST(sve2_add_sub_high) {
+ SETUP();
+
+ COMPARE_MACRO(Addhnb(z29.VnS(), z19.VnD(), z2.VnD()),
+ "addhnb z29.s, z19.d, z2.d");
+ COMPARE_MACRO(Addhnb(z29.VnB(), z19.VnH(), z2.VnH()),
+ "addhnb z29.b, z19.h, z2.h");
+ COMPARE_MACRO(Addhnb(z29.VnH(), z19.VnS(), z2.VnS()),
+ "addhnb z29.h, z19.s, z2.s");
+ COMPARE_MACRO(Addhnt(z8.VnS(), z12.VnD(), z6.VnD()),
+ "addhnt z8.s, z12.d, z6.d");
+ COMPARE_MACRO(Addhnt(z8.VnB(), z12.VnH(), z6.VnH()),
+ "addhnt z8.b, z12.h, z6.h");
+ COMPARE_MACRO(Addhnt(z8.VnH(), z12.VnS(), z6.VnS()),
+ "addhnt z8.h, z12.s, z6.s");
+ COMPARE_MACRO(Raddhnb(z0.VnS(), z11.VnD(), z10.VnD()),
+ "raddhnb z0.s, z11.d, z10.d");
+ COMPARE_MACRO(Raddhnb(z0.VnB(), z11.VnH(), z10.VnH()),
+ "raddhnb z0.b, z11.h, z10.h");
+ COMPARE_MACRO(Raddhnb(z0.VnH(), z11.VnS(), z10.VnS()),
+ "raddhnb z0.h, z11.s, z10.s");
+ COMPARE_MACRO(Raddhnt(z23.VnS(), z27.VnD(), z9.VnD()),
+ "raddhnt z23.s, z27.d, z9.d");
+ COMPARE_MACRO(Raddhnt(z23.VnB(), z27.VnH(), z9.VnH()),
+ "raddhnt z23.b, z27.h, z9.h");
+ COMPARE_MACRO(Raddhnt(z23.VnH(), z27.VnS(), z9.VnS()),
+ "raddhnt z23.h, z27.s, z9.s");
+ COMPARE_MACRO(Rsubhnb(z30.VnS(), z29.VnD(), z11.VnD()),
+ "rsubhnb z30.s, z29.d, z11.d");
+ COMPARE_MACRO(Rsubhnb(z30.VnB(), z29.VnH(), z11.VnH()),
+ "rsubhnb z30.b, z29.h, z11.h");
+ COMPARE_MACRO(Rsubhnb(z30.VnH(), z29.VnS(), z11.VnS()),
+ "rsubhnb z30.h, z29.s, z11.s");
+ COMPARE_MACRO(Rsubhnt(z25.VnS(), z7.VnD(), z18.VnD()),
+ "rsubhnt z25.s, z7.d, z18.d");
+ COMPARE_MACRO(Rsubhnt(z25.VnB(), z7.VnH(), z18.VnH()),
+ "rsubhnt z25.b, z7.h, z18.h");
+ COMPARE_MACRO(Rsubhnt(z25.VnH(), z7.VnS(), z18.VnS()),
+ "rsubhnt z25.h, z7.s, z18.s");
+ COMPARE_MACRO(Subhnb(z31.VnS(), z31.VnD(), z7.VnD()),
+ "subhnb z31.s, z31.d, z7.d");
+ COMPARE_MACRO(Subhnb(z31.VnB(), z31.VnH(), z7.VnH()),
+ "subhnb z31.b, z31.h, z7.h");
+ COMPARE_MACRO(Subhnb(z31.VnH(), z31.VnS(), z7.VnS()),
+ "subhnb z31.h, z31.s, z7.s");
+ COMPARE_MACRO(Subhnt(z31.VnS(), z22.VnD(), z27.VnD()),
+ "subhnt z31.s, z22.d, z27.d");
+ COMPARE_MACRO(Subhnt(z31.VnB(), z22.VnH(), z27.VnH()),
+ "subhnt z31.b, z22.h, z27.h");
+ COMPARE_MACRO(Subhnt(z31.VnH(), z22.VnS(), z27.VnS()),
+ "subhnt z31.h, z22.s, z27.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_complex_addition) {
+ SETUP();
+
+ COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 90),
+ "cadd z5.b, z5.b, z12.b, #90");
+ COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 90),
+ "cadd z5.d, z5.d, z12.d, #90");
+ COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 90),
+ "cadd z5.h, z5.h, z12.h, #90");
+ COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 90),
+ "cadd z5.s, z5.s, z12.s, #90");
+ COMPARE_MACRO(Cadd(z5.VnB(), z5.VnB(), z12.VnB(), 270),
+ "cadd z5.b, z5.b, z12.b, #270");
+ COMPARE_MACRO(Cadd(z5.VnD(), z5.VnD(), z12.VnD(), 270),
+ "cadd z5.d, z5.d, z12.d, #270");
+ COMPARE_MACRO(Cadd(z5.VnH(), z5.VnH(), z12.VnH(), 270),
+ "cadd z5.h, z5.h, z12.h, #270");
+ COMPARE_MACRO(Cadd(z5.VnS(), z5.VnS(), z12.VnS(), 270),
+ "cadd z5.s, z5.s, z12.s, #270");
+ COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z12.VnS(), 270),
+ "movprfx z5, z6\n"
+ "cadd z5.s, z5.s, z12.s, #270");
+ COMPARE_MACRO(Cadd(z5.VnS(), z6.VnS(), z5.VnS(), 270),
+ "mov z31.d, z5.d\n"
+ "movprfx z5, z6\n"
+ "cadd z5.s, z5.s, z31.s, #270");
+
+ COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 90),
+ "sqcadd z20.b, z20.b, z23.b, #90");
+ COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 90),
+ "sqcadd z20.d, z20.d, z23.d, #90");
+ COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 90),
+ "sqcadd z20.h, z20.h, z23.h, #90");
+ COMPARE_MACRO(Sqcadd(z20.VnB(), z20.VnB(), z23.VnB(), 270),
+ "sqcadd z20.b, z20.b, z23.b, #270");
+ COMPARE_MACRO(Sqcadd(z20.VnD(), z20.VnD(), z23.VnD(), 270),
+ "sqcadd z20.d, z20.d, z23.d, #270");
+ COMPARE_MACRO(Sqcadd(z20.VnH(), z20.VnH(), z23.VnH(), 270),
+ "sqcadd z20.h, z20.h, z23.h, #270");
+ COMPARE_MACRO(Sqcadd(z20.VnS(), z20.VnS(), z23.VnS(), 270),
+ "sqcadd z20.s, z20.s, z23.s, #270");
+ COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z23.VnH(), 270),
+ "movprfx z20, z21\n"
+ "sqcadd z20.h, z20.h, z23.h, #270");
+ COMPARE_MACRO(Sqcadd(z20.VnH(), z21.VnH(), z20.VnH(), 270),
+ "mov z31.d, z20.d\n"
+ "movprfx z20, z21\n"
+ "sqcadd z20.h, z20.h, z31.h, #270");
+
+ CLEANUP();
+}
+
+TEST(sve2_bit_permute) {
+ SETUP();
+
+ COMPARE_MACRO(Bdep(z18.VnB(), z10.VnB(), z0.VnB()),
+ "bdep z18.b, z10.b, z0.b");
+ COMPARE_MACRO(Bdep(z18.VnD(), z10.VnD(), z0.VnD()),
+ "bdep z18.d, z10.d, z0.d");
+ COMPARE_MACRO(Bdep(z18.VnH(), z10.VnH(), z0.VnH()),
+ "bdep z18.h, z10.h, z0.h");
+ COMPARE_MACRO(Bdep(z18.VnS(), z10.VnS(), z0.VnS()),
+ "bdep z18.s, z10.s, z0.s");
+ COMPARE_MACRO(Bext(z6.VnB(), z2.VnB(), z5.VnB()), "bext z6.b, z2.b, z5.b");
+ COMPARE_MACRO(Bext(z6.VnD(), z2.VnD(), z5.VnD()), "bext z6.d, z2.d, z5.d");
+ COMPARE_MACRO(Bext(z6.VnH(), z2.VnH(), z5.VnH()), "bext z6.h, z2.h, z5.h");
+ COMPARE_MACRO(Bext(z6.VnS(), z2.VnS(), z5.VnS()), "bext z6.s, z2.s, z5.s");
+ COMPARE_MACRO(Bgrp(z24.VnB(), z9.VnB(), z5.VnB()), "bgrp z24.b, z9.b, z5.b");
+ COMPARE_MACRO(Bgrp(z24.VnD(), z9.VnD(), z5.VnD()), "bgrp z24.d, z9.d, z5.d");
+ COMPARE_MACRO(Bgrp(z24.VnH(), z9.VnH(), z5.VnH()), "bgrp z24.h, z9.h, z5.h");
+ COMPARE_MACRO(Bgrp(z24.VnS(), z9.VnS(), z5.VnS()), "bgrp z24.s, z9.s, z5.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_integer_multiply_long_vector) {
+ SETUP();
+
+ COMPARE(sqdmullb(z1.VnD(), z31.VnS(), z21.VnS()),
+ "sqdmullb z1.d, z31.s, z21.s");
+ COMPARE(sqdmullb(z2.VnH(), z30.VnB(), z22.VnB()),
+ "sqdmullb z2.h, z30.b, z22.b");
+ COMPARE(sqdmullb(z3.VnS(), z29.VnH(), z23.VnH()),
+ "sqdmullb z3.s, z29.h, z23.h");
+ COMPARE(sqdmullb(z1.VnS(), z27.VnH(), z3.VnH(), 7),
+ "sqdmullb z1.s, z27.h, z3.h[7]");
+ COMPARE(sqdmullb(z27.VnD(), z16.VnS(), z5.VnS(), 3),
+ "sqdmullb z27.d, z16.s, z5.s[3]");
+
+ COMPARE(sqdmullt(z2.VnD(), z1.VnS(), z5.VnS()), "sqdmullt z2.d, z1.s, z5.s");
+ COMPARE(sqdmullt(z12.VnH(), z11.VnB(), z15.VnB()),
+ "sqdmullt z12.h, z11.b, z15.b");
+ COMPARE(sqdmullt(z20.VnS(), z21.VnH(), z25.VnH()),
+ "sqdmullt z20.s, z21.h, z25.h");
+ COMPARE(sqdmullt(z23.VnS(), z28.VnH(), z2.VnH(), 0),
+ "sqdmullt z23.s, z28.h, z2.h[0]");
+ COMPARE(sqdmullt(z7.VnD(), z4.VnS(), z0.VnS(), 0),
+ "sqdmullt z7.d, z4.s, z0.s[0]");
+
+ // Feature `SVEPmull128` is not supported.
+ // COMPARE(pmullb(z12.VnQ(), z21.VnD(), z12.VnD()),
+ // "pmullb z12.q, z21.d, z12.d");
+ COMPARE(pmullb(z12.VnH(), z21.VnB(), z12.VnB()),
+ "pmullb z12.h, z21.b, z12.b");
+ COMPARE(pmullt(z31.VnD(), z30.VnS(), z26.VnS()),
+ "pmullt z31.d, z30.s, z26.s");
+
+ COMPARE(smullb(z10.VnD(), z4.VnS(), z4.VnS()), "smullb z10.d, z4.s, z4.s");
+ COMPARE(smullb(z11.VnH(), z14.VnB(), z14.VnB()),
+ "smullb z11.h, z14.b, z14.b");
+ COMPARE(smullb(z12.VnS(), z24.VnH(), z24.VnH()),
+ "smullb z12.s, z24.h, z24.h");
+
+ COMPARE(smullt(z31.VnD(), z26.VnS(), z5.VnS()), "smullt z31.d, z26.s, z5.s");
+ COMPARE(smullt(z21.VnH(), z16.VnB(), z5.VnB()), "smullt z21.h, z16.b, z5.b");
+ COMPARE(smullt(z11.VnS(), z6.VnH(), z5.VnH()), "smullt z11.s, z6.h, z5.h");
+
+ COMPARE(umullb(z12.VnD(), z5.VnS(), z2.VnS()), "umullb z12.d, z5.s, z2.s");
+ COMPARE(umullb(z12.VnH(), z15.VnB(), z12.VnB()),
+ "umullb z12.h, z15.b, z12.b");
+ COMPARE(umullb(z12.VnS(), z25.VnH(), z22.VnH()),
+ "umullb z12.s, z25.h, z22.h");
+
+ COMPARE(umullt(z24.VnD(), z6.VnS(), z6.VnS()), "umullt z24.d, z6.s, z6.s");
+ COMPARE(umullt(z24.VnH(), z7.VnB(), z16.VnB()), "umullt z24.h, z7.b, z16.b");
+ COMPARE(umullt(z24.VnS(), z8.VnH(), z26.VnH()), "umullt z24.s, z8.h, z26.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_xar) {
+ SETUP();
+
+ COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 1),
+ "xar z16.b, z16.b, z13.b, #1");
+ COMPARE_MACRO(Xar(z16.VnB(), z16.VnB(), z13.VnB(), 8),
+ "xar z16.b, z16.b, z13.b, #8");
+ COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 1),
+ "xar z16.h, z16.h, z13.h, #1");
+ COMPARE_MACRO(Xar(z16.VnH(), z16.VnH(), z13.VnH(), 16),
+ "xar z16.h, z16.h, z13.h, #16");
+ COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 1),
+ "xar z16.s, z16.s, z13.s, #1");
+ COMPARE_MACRO(Xar(z16.VnS(), z16.VnS(), z13.VnS(), 32),
+ "xar z16.s, z16.s, z13.s, #32");
+ COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 1),
+ "xar z16.d, z16.d, z13.d, #1");
+ COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z13.VnD(), 64),
+ "xar z16.d, z16.d, z13.d, #64");
+
+ COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z16.VnD(), 64),
+ "xar z16.d, z16.d, z13.d, #64");
+ COMPARE_MACRO(Xar(z16.VnD(), z13.VnD(), z12.VnD(), 64),
+ "movprfx z16, z13\n"
+ "xar z16.d, z16.d, z12.d, #64");
+ COMPARE_MACRO(Xar(z16.VnD(), z16.VnD(), z16.VnD(), 64),
+ "xar z16.d, z16.d, z16.d, #64");
+
+ CLEANUP();
+}
+
+TEST(sve2_histogram) {
+ SETUP();
+
+ COMPARE_MACRO(Histcnt(z24.VnS(), p6.Zeroing(), z3.VnS(), z10.VnS()),
+ "histcnt z24.s, p6/z, z3.s, z10.s");
+ COMPARE_MACRO(Histcnt(z24.VnD(), p6.Zeroing(), z3.VnD(), z10.VnD()),
+ "histcnt z24.d, p6/z, z3.d, z10.d");
+ COMPARE_MACRO(Histseg(z22.VnB(), z14.VnB(), z8.VnB()),
+ "histseg z22.b, z14.b, z8.b");
+
+ CLEANUP();
+}
+
+TEST(sve2_table) {
+ SETUP();
+
+ COMPARE_MACRO(Tbl(z17.VnB(), z1.VnB(), z2.VnB(), z22.VnB()),
+ "tbl z17.b, {z3.b, z4.b}, z22.b");
+ COMPARE_MACRO(Tbl(z17.VnD(), z1.VnD(), z2.VnD(), z22.VnD()),
+ "tbl z17.d, {z3.d, z4.d}, z22.d");
+ COMPARE_MACRO(Tbl(z17.VnH(), z1.VnH(), z2.VnH(), z22.VnH()),
+ "tbl z17.h, {z3.h, z4.h}, z22.h");
+ COMPARE_MACRO(Tbl(z17.VnS(), z31.VnS(), z0.VnS(), z22.VnS()),
+ "tbl z17.s, {z31.s, z0.s}, z22.s");
+ COMPARE_MACRO(Tbx(z22.VnB(), z15.VnB(), z19.VnB()),
+ "tbx z22.b, z15.b, z19.b");
+ COMPARE_MACRO(Tbx(z22.VnD(), z15.VnD(), z19.VnD()),
+ "tbx z22.d, z15.d, z19.d");
+ COMPARE_MACRO(Tbx(z22.VnH(), z15.VnH(), z19.VnH()),
+ "tbx z22.h, z15.h, z19.h");
+ COMPARE_MACRO(Tbx(z22.VnS(), z15.VnS(), z19.VnS()),
+ "tbx z22.s, z15.s, z19.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_cdot) {
+ SETUP();
+
+ COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 0),
+ "cdot z7.s, z4.b, z10.b, #0");
+ COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 0),
+ "cdot z7.d, z4.h, z10.h, #0");
+ COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 90),
+ "cdot z7.s, z4.b, z10.b, #90");
+ COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 90),
+ "cdot z7.d, z4.h, z10.h, #90");
+ COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 180),
+ "cdot z7.s, z4.b, z10.b, #180");
+ COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 180),
+ "cdot z7.d, z4.h, z10.h, #180");
+ COMPARE_MACRO(Cdot(z7.VnS(), z7.VnS(), z4.VnB(), z10.VnB(), 270),
+ "cdot z7.s, z4.b, z10.b, #270");
+ COMPARE_MACRO(Cdot(z7.VnD(), z7.VnD(), z4.VnH(), z10.VnH(), 270),
+ "cdot z7.d, z4.h, z10.h, #270");
+
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0),
+ "movprfx z0, z1\n"
+ "cdot z0.s, z2.b, z3.b, #0");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 0),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cdot z0.s, z31.b, z3.b, #0");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 0),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cdot z0.s, z2.b, z31.b, #0");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 0),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cdot z0.s, z31.b, z31.b, #0");
+
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 0, 0),
+ "cdot z18.s, z26.b, z7.b[0], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 1, 0),
+ "cdot z18.s, z26.b, z7.b[1], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 0),
+ "cdot z18.s, z26.b, z7.b[2], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 3, 0),
+ "cdot z18.s, z26.b, z7.b[3], #0");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 90),
+ "cdot z18.s, z26.b, z7.b[2], #90");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 180),
+ "cdot z18.s, z26.b, z7.b[2], #180");
+ COMPARE_MACRO(Cdot(z18.VnS(), z18.VnS(), z26.VnB(), z7.VnB(), 2, 270),
+ "cdot z18.s, z26.b, z7.b[2], #270");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 0, 0),
+ "cdot z5.d, z7.h, z1.h[0], #0");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 0),
+ "cdot z5.d, z7.h, z1.h[1], #0");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 90),
+ "cdot z5.d, z7.h, z1.h[1], #90");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 180),
+ "cdot z5.d, z7.h, z1.h[1], #180");
+ COMPARE_MACRO(Cdot(z5.VnD(), z5.VnD(), z7.VnH(), z1.VnH(), 1, 270),
+ "cdot z5.d, z7.h, z1.h[1], #270");
+
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z3.VnB(), 0, 0),
+ "movprfx z0, z1\n"
+ "cdot z0.s, z2.b, z3.b[0], #0");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB(), 1, 90),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z0.b, z3.b[1], #90\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB(), 2, 180),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z2.b, z0.b[2], #180\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Cdot(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB(), 3, 270),
+ "movprfx z31, z1\n"
+ "cdot z31.s, z0.b, z0.b[3], #270\n"
+ "mov z0.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_ldnt1) {
+ SETUP();
+
+ COMPARE_MACRO(Ldnt1b(z24.VnS(), p4.Zeroing(), SVEMemOperand(z18.VnS(), x13)),
+ "ldnt1b {z24.s}, p4/z, [z18.s, x13]");
+ COMPARE_MACRO(Ldnt1h(z3.VnS(), p4.Zeroing(), SVEMemOperand(z15.VnS(), x14)),
+ "ldnt1h {z3.s}, p4/z, [z15.s, x14]");
+ COMPARE_MACRO(Ldnt1sb(z7.VnS(), p3.Zeroing(), SVEMemOperand(z18.VnS(), x11)),
+ "ldnt1sb {z7.s}, p3/z, [z18.s, x11]");
+ COMPARE_MACRO(Ldnt1sh(z17.VnS(), p5.Zeroing(), SVEMemOperand(z31.VnS(), x19)),
+ "ldnt1sh {z17.s}, p5/z, [z31.s, x19]");
+ COMPARE_MACRO(Ldnt1w(z18.VnS(), p5.Zeroing(), SVEMemOperand(z9.VnS(), x17)),
+ "ldnt1w {z18.s}, p5/z, [z9.s, x17]");
+
+ COMPARE_MACRO(Ldnt1b(z27.VnD(), p4.Zeroing(), SVEMemOperand(z27.VnD(), x24)),
+ "ldnt1b {z27.d}, p4/z, [z27.d, x24]");
+ COMPARE_MACRO(Ldnt1d(z25.VnD(), p0.Zeroing(), SVEMemOperand(z10.VnD(), x0)),
+ "ldnt1d {z25.d}, p0/z, [z10.d, x0]");
+ COMPARE_MACRO(Ldnt1h(z16.VnD(), p2.Zeroing(), SVEMemOperand(z10.VnD(), x9)),
+ "ldnt1h {z16.d}, p2/z, [z10.d, x9]");
+ COMPARE_MACRO(Ldnt1sb(z25.VnD(), p0.Zeroing(), SVEMemOperand(z0.VnD(), x3)),
+ "ldnt1sb {z25.d}, p0/z, [z0.d, x3]");
+ COMPARE_MACRO(Ldnt1sh(z4.VnD(), p1.Zeroing(), SVEMemOperand(z31.VnD(), x4)),
+ "ldnt1sh {z4.d}, p1/z, [z31.d, x4]");
+ COMPARE_MACRO(Ldnt1sw(z3.VnD(), p7.Zeroing(), SVEMemOperand(z1.VnD(), x10)),
+ "ldnt1sw {z3.d}, p7/z, [z1.d, x10]");
+ COMPARE_MACRO(Ldnt1w(z17.VnD(), p5.Zeroing(), SVEMemOperand(z8.VnD(), x12)),
+ "ldnt1w {z17.d}, p5/z, [z8.d, x12]");
+
+ CLEANUP();
+}
+
+TEST(sve2_stnt1) {
+ SETUP();
+
+ COMPARE_MACRO(Stnt1b(z29.VnD(), p7, SVEMemOperand(z29.VnD(), x21)),
+ "stnt1b {z29.d}, p7, [z29.d, x21]");
+ COMPARE_MACRO(Stnt1d(z19.VnD(), p4, SVEMemOperand(z3.VnD(), x16)),
+ "stnt1d {z19.d}, p4, [z3.d, x16]");
+ COMPARE_MACRO(Stnt1h(z11.VnS(), p3, SVEMemOperand(z2.VnS(), x16)),
+ "stnt1h {z11.s}, p3, [z2.s, x16]");
+ COMPARE_MACRO(Stnt1h(z3.VnD(), p3, SVEMemOperand(z10.VnD(), x16)),
+ "stnt1h {z3.d}, p3, [z10.d, x16]");
+ COMPARE_MACRO(Stnt1w(z11.VnS(), p4, SVEMemOperand(z14.VnS(), x15)),
+ "stnt1w {z11.s}, p4, [z14.s, x15]");
+ COMPARE_MACRO(Stnt1w(z7.VnD(), p0, SVEMemOperand(z11.VnD(), x10)),
+ "stnt1w {z7.d}, p0, [z11.d, x10]");
+
+ CLEANUP();
+}
+
+TEST(sve2_bitwise_ternary) {
+ SETUP();
+
+ COMPARE_MACRO(Bcax(z6.VnD(), z6.VnD(), z12.VnD(), z1.VnD()),
+ "bcax z6.d, z6.d, z12.d, z1.d");
+ COMPARE_MACRO(Bsl(z21.VnD(), z21.VnD(), z2.VnD(), z2.VnD()),
+ "bsl z21.d, z21.d, z2.d, z2.d");
+ COMPARE_MACRO(Bsl1n(z18.VnD(), z18.VnD(), z8.VnD(), z7.VnD()),
+ "bsl1n z18.d, z18.d, z8.d, z7.d");
+ COMPARE_MACRO(Bsl2n(z7.VnD(), z7.VnD(), z3.VnD(), z19.VnD()),
+ "bsl2n z7.d, z7.d, z3.d, z19.d");
+ COMPARE_MACRO(Eor3(z10.VnD(), z10.VnD(), z24.VnD(), z23.VnD()),
+ "eor3 z10.d, z10.d, z24.d, z23.d");
+ COMPARE_MACRO(Nbsl(z17.VnD(), z17.VnD(), z21.VnD(), z27.VnD()),
+ "nbsl z17.d, z17.d, z21.d, z27.d");
+
+ COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z27.VnD()),
+ "movprfx z17, z18\n"
+ "nbsl z17.d, z17.d, z21.d, z27.d");
+ COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z27.VnD()),
+ "movprfx z31, z18\n"
+ "nbsl z31.d, z31.d, z17.d, z27.d\n"
+ "mov z17.d, z31.d");
+ COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z21.VnD(), z17.VnD()),
+ "movprfx z31, z18\n"
+ "nbsl z31.d, z31.d, z21.d, z17.d\n"
+ "mov z17.d, z31.d");
+ COMPARE_MACRO(Nbsl(z17.VnD(), z18.VnD(), z17.VnD(), z17.VnD()),
+ "movprfx z31, z18\n"
+ "nbsl z31.d, z31.d, z17.d, z17.d\n"
+ "mov z17.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_int_compare_scalars) {
+ SETUP();
+
+ COMPARE_MACRO(Whilege(p0.VnB(), w20, w29), "whilege p0.b, w20, w29");
+ COMPARE_MACRO(Whilege(p0.VnB(), x20, x29), "whilege p0.b, x20, x29");
+ COMPARE_MACRO(Whilege(p0.VnD(), w20, w29), "whilege p0.d, w20, w29");
+ COMPARE_MACRO(Whilege(p0.VnD(), x20, x29), "whilege p0.d, x20, x29");
+ COMPARE_MACRO(Whilege(p0.VnH(), w20, w29), "whilege p0.h, w20, w29");
+ COMPARE_MACRO(Whilege(p0.VnH(), x20, x29), "whilege p0.h, x20, x29");
+ COMPARE_MACRO(Whilege(p0.VnS(), w20, w29), "whilege p0.s, w20, w29");
+ COMPARE_MACRO(Whilege(p0.VnS(), x20, x29), "whilege p0.s, x20, x29");
+ COMPARE_MACRO(Whilegt(p11.VnB(), w24, w3), "whilegt p11.b, w24, w3");
+ COMPARE_MACRO(Whilegt(p11.VnD(), w24, w3), "whilegt p11.d, w24, w3");
+ COMPARE_MACRO(Whilegt(p11.VnH(), x24, x3), "whilegt p11.h, x24, x3");
+ COMPARE_MACRO(Whilegt(p11.VnS(), x24, x3), "whilegt p11.s, x24, x3");
+ COMPARE_MACRO(Whilehi(p2.VnB(), x20, x8), "whilehi p2.b, x20, x8");
+ COMPARE_MACRO(Whilehi(p2.VnD(), x20, x8), "whilehi p2.d, x20, x8");
+ COMPARE_MACRO(Whilehi(p2.VnH(), w20, w8), "whilehi p2.h, w20, w8");
+ COMPARE_MACRO(Whilehi(p2.VnS(), w20, w8), "whilehi p2.s, w20, w8");
+ COMPARE_MACRO(Whilehs(p4.VnB(), w22, w9), "whilehs p4.b, w22, w9");
+ COMPARE_MACRO(Whilehs(p4.VnD(), x22, x9), "whilehs p4.d, x22, x9");
+ COMPARE_MACRO(Whilehs(p4.VnH(), w22, w9), "whilehs p4.h, w22, w9");
+ COMPARE_MACRO(Whilehs(p4.VnS(), x22, x9), "whilehs p4.s, x22, x9");
+
+ COMPARE_MACRO(Whilerw(p7.VnB(), x25, x27), "whilerw p7.b, x25, x27");
+ COMPARE_MACRO(Whilerw(p7.VnD(), x25, x28), "whilerw p7.d, x25, x28");
+ COMPARE_MACRO(Whilerw(p7.VnH(), x25, x29), "whilerw p7.h, x25, x29");
+ COMPARE_MACRO(Whilerw(p7.VnS(), x25, x30), "whilerw p7.s, x25, x30");
+ COMPARE_MACRO(Whilerw(p7.VnS(), x25, xzr), "whilerw p7.s, x25, xzr");
+ COMPARE_MACRO(Whilewr(p8.VnB(), x14, x14), "whilewr p8.b, x14, x14");
+ COMPARE_MACRO(Whilewr(p8.VnD(), x14, x13), "whilewr p8.d, x14, x13");
+ COMPARE_MACRO(Whilewr(p8.VnH(), x14, x12), "whilewr p8.h, x14, x12");
+ COMPARE_MACRO(Whilewr(p8.VnS(), x14, x11), "whilewr p8.s, x14, x11");
+ COMPARE_MACRO(Whilewr(p8.VnS(), xzr, x11), "whilewr p8.s, xzr, x11");
+
+ CLEANUP();
+}
+
+TEST(sve2_splice) {
+ SETUP();
+
+ COMPARE_MACRO(Splice(z31.VnB(), p0, z21.VnB(), z22.VnB()),
+ "splice z31.b, p0, {z21.b, z22.b}");
+ COMPARE_MACRO(Splice(z31.VnD(), p0, z21.VnD(), z22.VnD()),
+ "splice z31.d, p0, {z21.d, z22.d}");
+ COMPARE_MACRO(Splice(z31.VnH(), p0, z21.VnH(), z22.VnH()),
+ "splice z31.h, p0, {z21.h, z22.h}");
+ COMPARE_MACRO(Splice(z31.VnS(), p0, z31.VnS(), z0.VnS()),
+ "splice z31.s, p0, z31.s, z0.s");
+ COMPARE_MACRO(Splice(z30.VnS(), p0, z31.VnS(), z0.VnS()),
+ "splice z30.s, p0, {z31.s, z0.s}");
+
+ CLEANUP();
+}
+
+TEST(sve2_mul_index) {
+ SETUP();
+
+ COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z7.VnH(), 0),
+ "mul z18.h, z5.h, z7.h[0]");
+ COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 2),
+ "mul z18.h, z5.h, z2.h[2]");
+ COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 6),
+ "mul z18.h, z5.h, z2.h[6]");
+ COMPARE_MACRO(Mul(z18.VnH(), z5.VnH(), z2.VnH(), 7),
+ "mul z18.h, z5.h, z2.h[7]");
+ COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z7.VnS(), 0),
+ "mul z8.s, z15.s, z7.s[0]");
+ COMPARE_MACRO(Mul(z8.VnS(), z15.VnS(), z0.VnS(), 3),
+ "mul z8.s, z15.s, z0.s[3]");
+ COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z15.VnD(), 0),
+ "mul z8.d, z15.d, z15.d[0]");
+ COMPARE_MACRO(Mul(z8.VnD(), z15.VnD(), z0.VnD(), 1),
+ "mul z8.d, z15.d, z0.d[1]");
+
+ CLEANUP();
+}
+
+TEST(sve2_mla_mls_index) {
+ SETUP();
+
+ COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0),
+ "mla z1.h, z9.h, z0.h[0]");
+ COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2),
+ "mla z1.h, z9.h, z1.h[2]");
+ COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6),
+ "mla z1.h, z9.h, z2.h[6]");
+ COMPARE_MACRO(Mla(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7),
+ "mla z1.h, z9.h, z3.h[7]");
+ COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0),
+ "mla z10.s, z22.s, z7.s[0]");
+ COMPARE_MACRO(Mla(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3),
+ "mla z10.s, z22.s, z0.s[3]");
+ COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0),
+ "mla z4.d, z0.d, z15.d[0]");
+ COMPARE_MACRO(Mla(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1),
+ "mla z4.d, z0.d, z0.d[1]");
+
+ COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z1.VnH(), 0),
+ "movprfx z4, z5\n"
+ "mla z4.h, z0.h, z1.h[0]");
+ COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z1.VnH(), 0),
+ "movprfx z31, z5\n"
+ "mla z31.h, z4.h, z1.h[0]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z0.VnH(), z4.VnH(), 0),
+ "movprfx z31, z5\n"
+ "mla z31.h, z0.h, z4.h[0]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Mla(z4.VnH(), z5.VnH(), z4.VnH(), z4.VnH(), 0),
+ "movprfx z31, z5\n"
+ "mla z31.h, z4.h, z4.h[0]\n"
+ "mov z4.d, z31.d");
+
+ COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z0.VnH(), 0),
+ "mls z1.h, z9.h, z0.h[0]");
+ COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z1.VnH(), 2),
+ "mls z1.h, z9.h, z1.h[2]");
+ COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z2.VnH(), 6),
+ "mls z1.h, z9.h, z2.h[6]");
+ COMPARE_MACRO(Mls(z1.VnH(), z1.VnH(), z9.VnH(), z3.VnH(), 7),
+ "mls z1.h, z9.h, z3.h[7]");
+ COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z7.VnS(), 0),
+ "mls z10.s, z22.s, z7.s[0]");
+ COMPARE_MACRO(Mls(z10.VnS(), z10.VnS(), z22.VnS(), z0.VnS(), 3),
+ "mls z10.s, z22.s, z0.s[3]");
+ COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z15.VnD(), 0),
+ "mls z4.d, z0.d, z15.d[0]");
+ COMPARE_MACRO(Mls(z4.VnD(), z4.VnD(), z0.VnD(), z0.VnD(), 1),
+ "mls z4.d, z0.d, z0.d[1]");
+
+ COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z1.VnS(), 0),
+ "movprfx z4, z5\n"
+ "mls z4.s, z0.s, z1.s[0]");
+ COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z1.VnS(), 0),
+ "movprfx z31, z5\n"
+ "mls z31.s, z4.s, z1.s[0]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z0.VnS(), z4.VnS(), 0),
+ "movprfx z31, z5\n"
+ "mls z31.s, z0.s, z4.s[0]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Mls(z4.VnS(), z5.VnS(), z4.VnS(), z4.VnS(), 0),
+ "movprfx z31, z5\n"
+ "mls z31.s, z4.s, z4.s[0]\n"
+ "mov z4.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_mla_long) {
+ SETUP();
+
+ COMPARE_MACRO(Smlalb(z1.VnD(), z1.VnD(), z3.VnS(), z23.VnS()),
+ "smlalb z1.d, z3.s, z23.s");
+ COMPARE_MACRO(Smlalb(z1.VnH(), z1.VnH(), z3.VnB(), z23.VnB()),
+ "smlalb z1.h, z3.b, z23.b");
+ COMPARE_MACRO(Smlalb(z1.VnS(), z1.VnS(), z3.VnH(), z23.VnH()),
+ "smlalb z1.s, z3.h, z23.h");
+ COMPARE_MACRO(Smlalt(z31.VnD(), z31.VnD(), z24.VnS(), z29.VnS()),
+ "smlalt z31.d, z24.s, z29.s");
+ COMPARE_MACRO(Smlalt(z31.VnH(), z31.VnH(), z24.VnB(), z29.VnB()),
+ "smlalt z31.h, z24.b, z29.b");
+ COMPARE_MACRO(Smlalt(z31.VnS(), z31.VnS(), z24.VnH(), z29.VnH()),
+ "smlalt z31.s, z24.h, z29.h");
+ COMPARE_MACRO(Smlslb(z5.VnD(), z5.VnD(), z26.VnS(), z27.VnS()),
+ "smlslb z5.d, z26.s, z27.s");
+ COMPARE_MACRO(Smlslb(z5.VnH(), z5.VnH(), z26.VnB(), z27.VnB()),
+ "smlslb z5.h, z26.b, z27.b");
+ COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z26.VnH(), z27.VnH()),
+ "smlslb z5.s, z26.h, z27.h");
+ COMPARE_MACRO(Smlslt(z23.VnD(), z23.VnD(), z24.VnS(), z25.VnS()),
+ "smlslt z23.d, z24.s, z25.s");
+ COMPARE_MACRO(Smlslt(z23.VnH(), z23.VnH(), z24.VnB(), z25.VnB()),
+ "smlslt z23.h, z24.b, z25.b");
+ COMPARE_MACRO(Smlslt(z23.VnS(), z23.VnS(), z24.VnH(), z25.VnH()),
+ "smlslt z23.s, z24.h, z25.h");
+ COMPARE_MACRO(Umlalb(z31.VnD(), z31.VnD(), z9.VnS(), z21.VnS()),
+ "umlalb z31.d, z9.s, z21.s");
+ COMPARE_MACRO(Umlalb(z31.VnH(), z31.VnH(), z9.VnB(), z21.VnB()),
+ "umlalb z31.h, z9.b, z21.b");
+ COMPARE_MACRO(Umlalb(z31.VnS(), z31.VnS(), z9.VnH(), z21.VnH()),
+ "umlalb z31.s, z9.h, z21.h");
+ COMPARE_MACRO(Umlalt(z11.VnD(), z11.VnD(), z5.VnS(), z22.VnS()),
+ "umlalt z11.d, z5.s, z22.s");
+ COMPARE_MACRO(Umlalt(z11.VnH(), z11.VnH(), z5.VnB(), z22.VnB()),
+ "umlalt z11.h, z5.b, z22.b");
+ COMPARE_MACRO(Umlalt(z11.VnS(), z11.VnS(), z5.VnH(), z22.VnH()),
+ "umlalt z11.s, z5.h, z22.h");
+ COMPARE_MACRO(Umlslb(z28.VnD(), z28.VnD(), z13.VnS(), z9.VnS()),
+ "umlslb z28.d, z13.s, z9.s");
+ COMPARE_MACRO(Umlslb(z28.VnH(), z28.VnH(), z13.VnB(), z9.VnB()),
+ "umlslb z28.h, z13.b, z9.b");
+ COMPARE_MACRO(Umlslb(z28.VnS(), z28.VnS(), z13.VnH(), z9.VnH()),
+ "umlslb z28.s, z13.h, z9.h");
+ COMPARE_MACRO(Umlslt(z9.VnD(), z9.VnD(), z12.VnS(), z30.VnS()),
+ "umlslt z9.d, z12.s, z30.s");
+ COMPARE_MACRO(Umlslt(z9.VnH(), z9.VnH(), z12.VnB(), z30.VnB()),
+ "umlslt z9.h, z12.b, z30.b");
+ COMPARE_MACRO(Umlslt(z9.VnS(), z9.VnS(), z12.VnH(), z30.VnH()),
+ "umlslt z9.s, z12.h, z30.h");
+
+ COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z3.VnS()),
+ "movprfx z0, z1\n"
+ "smlalt z0.d, z2.s, z3.s");
+ COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z3.VnS()),
+ "movprfx z31, z1\n"
+ "smlalt z31.d, z0.s, z3.s\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z2.VnS(), z0.VnS()),
+ "movprfx z31, z1\n"
+ "smlalt z31.d, z2.s, z0.s\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Smlalt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()),
+ "movprfx z31, z1\n"
+ "smlalt z31.d, z0.s, z0.s\n"
+ "mov z0.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_complex_integer_multiply_add) {
+ SETUP();
+
+ COMPARE(sqrdcmlah(z31.VnB(), z15.VnB(), z20.VnB(), 0),
+ "sqrdcmlah z31.b, z15.b, z20.b, #0");
+ COMPARE(sqrdcmlah(z31.VnD(), z15.VnD(), z20.VnD(), 90),
+ "sqrdcmlah z31.d, z15.d, z20.d, #90");
+ COMPARE(sqrdcmlah(z31.VnH(), z15.VnH(), z20.VnH(), 180),
+ "sqrdcmlah z31.h, z15.h, z20.h, #180");
+ COMPARE(sqrdcmlah(z31.VnS(), z15.VnS(), z20.VnS(), 270),
+ "sqrdcmlah z31.s, z15.s, z20.s, #270");
+
+ COMPARE(sqrdcmlah(z14.VnS(), z11.VnS(), z8.VnS(), 1, 0),
+ "sqrdcmlah z14.s, z11.s, z8.s[1], #0");
+ COMPARE(sqrdcmlah(z31.VnH(), z2.VnH(), z3.VnH(), 2, 180),
+ "sqrdcmlah z31.h, z2.h, z3.h[2], #180");
+
+ COMPARE_MACRO(Sqrdcmlah(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "sqrdcmlah z0.b, z31.b, z3.b, #0");
+ COMPARE_MACRO(Sqrdcmlah(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "sqrdcmlah z0.h, z2.h, z31.h, #90");
+ COMPARE_MACRO(Sqrdcmlah(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 0, 180),
+ "movprfx z31, z1\n"
+ "sqrdcmlah z31.s, z0.s, z0.s[0], #180\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Sqrdcmlah(z5.VnH(), z1.VnH(), z2.VnH(), z5.VnH(), 3, 270),
+ "movprfx z31, z1\n"
+ "sqrdcmlah z31.h, z2.h, z5.h[3], #270\n"
+ "mov z5.d, z31.d");
+ COMPARE_MACRO(Sqrdcmlah(z3.VnH(), z3.VnH(), z3.VnH(), z3.VnH(), 2, 90),
+ "sqrdcmlah z3.h, z3.h, z3.h[2], #90");
+
+ COMPARE(cmla(z19.VnB(), z7.VnB(), z2.VnB(), 0), "cmla z19.b, z7.b, z2.b, #0");
+ COMPARE(cmla(z19.VnD(), z7.VnD(), z2.VnD(), 90),
+ "cmla z19.d, z7.d, z2.d, #90");
+ COMPARE(cmla(z19.VnH(), z7.VnH(), z2.VnH(), 180),
+ "cmla z19.h, z7.h, z2.h, #180");
+ COMPARE(cmla(z19.VnS(), z7.VnS(), z2.VnS(), 270),
+ "cmla z19.s, z7.s, z2.s, #270");
+
+ COMPARE_MACRO(Cmla(z0.VnB(), z1.VnB(), z0.VnB(), z3.VnB(), 0),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cmla z0.b, z31.b, z3.b, #0");
+ COMPARE_MACRO(Cmla(z0.VnH(), z1.VnH(), z2.VnH(), z0.VnH(), 90),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cmla z0.h, z2.h, z31.h, #90");
+ COMPARE_MACRO(Cmla(z0.VnS(), z1.VnS(), z0.VnS(), z0.VnS(), 180),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cmla z0.s, z31.s, z31.s, #180");
+ COMPARE_MACRO(Cmla(z0.VnD(), z1.VnD(), z2.VnD(), z0.VnD(), 270),
+ "mov z31.d, z0.d\n"
+ "movprfx z0, z1\n"
+ "cmla z0.d, z2.d, z31.d, #270");
+
+ COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z0.VnS(), 1, 0),
+ "cmla z17.s, z29.s, z0.s[1], #0");
+ COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z1.VnS(), 0, 0),
+ "cmla z17.s, z29.s, z1.s[0], #0");
+ COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z8.VnS(), 1, 90),
+ "cmla z17.s, z29.s, z8.s[1], #90");
+ COMPARE_MACRO(Cmla(z17.VnS(), z17.VnS(), z29.VnS(), z15.VnS(), 0, 180),
+ "cmla z17.s, z29.s, z15.s[0], #180");
+ COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z0.VnH(), 3, 0),
+ "cmla z18.h, z22.h, z0.h[3], #0");
+ COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z1.VnH(), 2, 0),
+ "cmla z18.h, z22.h, z1.h[2], #0");
+ COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z4.VnH(), 1, 270),
+ "cmla z18.h, z22.h, z4.h[1], #270");
+ COMPARE_MACRO(Cmla(z18.VnH(), z18.VnH(), z22.VnH(), z7.VnH(), 0, 90),
+ "cmla z18.h, z22.h, z7.h[0], #90");
+
+ COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z7.VnH(), 0, 90),
+ "movprfx z1, z19\n"
+ "cmla z1.h, z22.h, z7.h[0], #90");
+ COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z7.VnH(), 0, 90),
+ "movprfx z31, z19\n"
+ "cmla z31.h, z1.h, z7.h[0], #90\n"
+ "mov z1.d, z31.d");
+ COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z22.VnH(), z1.VnH(), 0, 90),
+ "movprfx z31, z19\n"
+ "cmla z31.h, z22.h, z1.h[0], #90\n"
+ "mov z1.d, z31.d");
+ COMPARE_MACRO(Cmla(z1.VnH(), z19.VnH(), z1.VnH(), z1.VnH(), 0, 90),
+ "movprfx z31, z19\n"
+ "cmla z31.h, z1.h, z1.h[0], #90\n"
+ "mov z1.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_saturating_multiply_add_long) {
+ SETUP();
+
+ COMPARE(sqdmlalb(z6.VnD(), z19.VnS(), z25.VnS()),
+ "sqdmlalb z6.d, z19.s, z25.s");
+ COMPARE(sqdmlalb(z6.VnH(), z19.VnB(), z25.VnB()),
+ "sqdmlalb z6.h, z19.b, z25.b");
+ COMPARE(sqdmlalb(z6.VnS(), z19.VnH(), z25.VnH()),
+ "sqdmlalb z6.s, z19.h, z25.h");
+ COMPARE(sqdmlalt(z11.VnD(), z0.VnS(), z10.VnS()),
+ "sqdmlalt z11.d, z0.s, z10.s");
+ COMPARE(sqdmlalt(z11.VnH(), z0.VnB(), z10.VnB()),
+ "sqdmlalt z11.h, z0.b, z10.b");
+ COMPARE(sqdmlalt(z11.VnS(), z0.VnH(), z10.VnH()),
+ "sqdmlalt z11.s, z0.h, z10.h");
+ COMPARE(sqdmlslb(z16.VnD(), z26.VnS(), z25.VnS()),
+ "sqdmlslb z16.d, z26.s, z25.s");
+ COMPARE(sqdmlslb(z16.VnH(), z26.VnB(), z25.VnB()),
+ "sqdmlslb z16.h, z26.b, z25.b");
+ COMPARE(sqdmlslb(z16.VnS(), z26.VnH(), z25.VnH()),
+ "sqdmlslb z16.s, z26.h, z25.h");
+ COMPARE(sqdmlslt(z21.VnD(), z23.VnS(), z9.VnS()),
+ "sqdmlslt z21.d, z23.s, z9.s");
+ COMPARE(sqdmlslt(z21.VnH(), z23.VnB(), z9.VnB()),
+ "sqdmlslt z21.h, z23.b, z9.b");
+ COMPARE(sqdmlslt(z21.VnS(), z23.VnH(), z9.VnH()),
+ "sqdmlslt z21.s, z23.h, z9.h");
+
+ COMPARE(sqdmlalb(z1.VnD(), z27.VnS(), z11.VnS(), 0),
+ "sqdmlalb z1.d, z27.s, z11.s[0]");
+ COMPARE(sqdmlalb(z30.VnS(), z6.VnH(), z3.VnH(), 0),
+ "sqdmlalb z30.s, z6.h, z3.h[0]");
+ COMPARE(sqdmlalt(z30.VnD(), z25.VnS(), z15.VnS(), 1),
+ "sqdmlalt z30.d, z25.s, z15.s[1]");
+ COMPARE(sqdmlalt(z10.VnS(), z1.VnH(), z1.VnH(), 3),
+ "sqdmlalt z10.s, z1.h, z1.h[3]");
+ COMPARE(sqdmlslb(z15.VnD(), z27.VnS(), z15.VnS(), 2),
+ "sqdmlslb z15.d, z27.s, z15.s[2]");
+ COMPARE(sqdmlslb(z5.VnS(), z5.VnH(), z7.VnH(), 6),
+ "sqdmlslb z5.s, z5.h, z7.h[6]");
+ COMPARE(sqdmlslt(z21.VnD(), z28.VnS(), z13.VnS(), 3),
+ "sqdmlslt z21.d, z28.s, z13.s[3]");
+ COMPARE(sqdmlslt(z5.VnS(), z3.VnH(), z1.VnH(), 7),
+ "sqdmlslt z5.s, z3.h, z1.h[7]");
+
+ COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z19.VnS(), z25.VnS()),
+ "movprfx z6, z16\n"
+ "sqdmlalb z6.d, z19.s, z25.s");
+ COMPARE_MACRO(Sqdmlalt(z4.VnH(), z26.VnH(), z4.VnB(), z24.VnB()),
+ "movprfx z31, z26\n"
+ "sqdmlalt z31.h, z4.b, z24.b\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Sqdmlslb(z2.VnS(), z6.VnS(), z17.VnH(), z2.VnH()),
+ "movprfx z31, z6\n"
+ "sqdmlslb z31.s, z17.h, z2.h\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Sqdmlslt(z0.VnD(), z1.VnD(), z0.VnS(), z0.VnS()),
+ "movprfx z31, z1\n"
+ "sqdmlslt z31.d, z0.s, z0.s\n"
+ "mov z0.d, z31.d");
+
+ COMPARE_MACRO(Sqdmlalb(z6.VnD(), z16.VnD(), z9.VnS(), z15.VnS(), 0),
+ "movprfx z6, z16\n"
+ "sqdmlalb z6.d, z9.s, z15.s[0]");
+ COMPARE_MACRO(Sqdmlalt(z4.VnS(), z6.VnS(), z4.VnH(), z4.VnH(), 3),
+ "movprfx z31, z6\n"
+ "sqdmlalt z31.s, z4.h, z4.h[3]\n"
+ "mov z4.d, z31.d");
+ COMPARE_MACRO(Sqdmlslb(z2.VnS(), z16.VnS(), z17.VnH(), z2.VnH(), 6),
+ "movprfx z31, z16\n"
+ "sqdmlslb z31.s, z17.h, z2.h[6]\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Sqdmlslt(z6.VnD(), z1.VnD(), z6.VnS(), z6.VnS(), 2),
+ "movprfx z31, z1\n"
+ "sqdmlslt z31.d, z6.s, z6.s[2]\n"
+ "mov z6.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_saturating_multiply_add_interleaved_long) {
+ SETUP();
+
+ COMPARE(sqdmlalbt(z23.VnD(), z29.VnS(), z26.VnS()),
+ "sqdmlalbt z23.d, z29.s, z26.s");
+ COMPARE(sqdmlalbt(z23.VnH(), z29.VnB(), z26.VnB()),
+ "sqdmlalbt z23.h, z29.b, z26.b");
+ COMPARE(sqdmlalbt(z23.VnS(), z29.VnH(), z26.VnH()),
+ "sqdmlalbt z23.s, z29.h, z26.h");
+ COMPARE(sqdmlslbt(z26.VnD(), z23.VnS(), z4.VnS()),
+ "sqdmlslbt z26.d, z23.s, z4.s");
+ COMPARE(sqdmlslbt(z26.VnH(), z23.VnB(), z4.VnB()),
+ "sqdmlslbt z26.h, z23.b, z4.b");
+ COMPARE(sqdmlslbt(z26.VnS(), z23.VnH(), z4.VnH()),
+ "sqdmlslbt z26.s, z23.h, z4.h");
+
+ COMPARE_MACRO(Sqdmlalbt(z29.VnD(), z0.VnD(), z29.VnS(), z26.VnS()),
+ "movprfx z31, z0\n"
+ "sqdmlalbt z31.d, z29.s, z26.s\n"
+ "mov z29.d, z31.d");
+ COMPARE_MACRO(Sqdmlalbt(z26.VnH(), z0.VnH(), z29.VnB(), z26.VnB()),
+ "movprfx z31, z0\n"
+ "sqdmlalbt z31.h, z29.b, z26.b\n"
+ "mov z26.d, z31.d");
+ COMPARE_MACRO(Sqdmlslbt(z23.VnS(), z31.VnS(), z26.VnH(), z29.VnH()),
+ "movprfx z23, z31\n"
+ "sqdmlslbt z23.s, z26.h, z29.h");
+ COMPARE_MACRO(Sqdmlslbt(z4.VnD(), z31.VnD(), z4.VnS(), z4.VnS()),
+ "sqdmlslbt z31.d, z4.s, z4.s\n"
+ "mov z4.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_floating_multiply_add_long_vector) {
+ SETUP();
+
+ COMPARE(fmlalb(z16.VnS(), z18.VnH(), z29.VnH()),
+ "fmlalb z16.s, z18.h, z29.h");
+ COMPARE(fmlalb(z3.VnS(), z8.VnH(), z7.VnH()), "fmlalb z3.s, z8.h, z7.h");
+ COMPARE(fmlalt(z18.VnS(), z13.VnH(), z5.VnH()), "fmlalt z18.s, z13.h, z5.h");
+ COMPARE(fmlalt(z18.VnS(), z7.VnH(), z16.VnH()), "fmlalt z18.s, z7.h, z16.h");
+ COMPARE(fmlslb(z16.VnS(), z10.VnH(), z1.VnH()), "fmlslb z16.s, z10.h, z1.h");
+ COMPARE(fmlslb(z25.VnS(), z11.VnH(), z0.VnH()), "fmlslb z25.s, z11.h, z0.h");
+ COMPARE(fmlslt(z3.VnS(), z17.VnH(), z14.VnH()), "fmlslt z3.s, z17.h, z14.h");
+ COMPARE(fmlslt(z5.VnS(), z1.VnH(), z7.VnH()), "fmlslt z5.s, z1.h, z7.h");
+
+ CLEANUP();
+}
+
+TEST(sve2_mla_long_index) {
+ SETUP();
+
+ COMPARE_MACRO(Smlalb(z11.VnD(), z11.VnD(), z29.VnS(), z0.VnS(), 3),
+ "smlalb z11.d, z29.s, z0.s[3]");
+ COMPARE_MACRO(Smlalb(z18.VnS(), z18.VnS(), z17.VnH(), z0.VnH(), 7),
+ "smlalb z18.s, z17.h, z0.h[7]");
+ COMPARE_MACRO(Smlalt(z10.VnD(), z10.VnD(), z30.VnS(), z15.VnS(), 0),
+ "smlalt z10.d, z30.s, z15.s[0]");
+ COMPARE_MACRO(Smlalt(z23.VnS(), z23.VnS(), z31.VnH(), z7.VnH(), 0),
+ "smlalt z23.s, z31.h, z7.h[0]");
+ COMPARE_MACRO(Smlslb(z12.VnD(), z12.VnD(), z23.VnS(), z3.VnS(), 1),
+ "smlslb z12.d, z23.s, z3.s[1]");
+ COMPARE_MACRO(Smlslb(z5.VnS(), z5.VnS(), z4.VnH(), z4.VnH(), 2),
+ "smlslb z5.s, z4.h, z4.h[2]");
+ COMPARE_MACRO(Smlslt(z7.VnD(), z7.VnD(), z9.VnS(), z6.VnS(), 3),
+ "smlslt z7.d, z9.s, z6.s[3]");
+ COMPARE_MACRO(Smlslt(z9.VnS(), z9.VnS(), z21.VnH(), z3.VnH(), 4),
+ "smlslt z9.s, z21.h, z3.h[4]");
+ COMPARE_MACRO(Umlalb(z9.VnD(), z9.VnD(), z1.VnS(), z11.VnS(), 0),
+ "umlalb z9.d, z1.s, z11.s[0]");
+ COMPARE_MACRO(Umlalb(z9.VnS(), z9.VnS(), z5.VnH(), z1.VnH(), 6),
+ "umlalb z9.s, z5.h, z1.h[6]");
+ COMPARE_MACRO(Umlalt(z6.VnD(), z6.VnD(), z17.VnS(), z14.VnS(), 1),
+ "umlalt z6.d, z17.s, z14.s[1]");
+ COMPARE_MACRO(Umlalt(z9.VnS(), z9.VnS(), z11.VnH(), z3.VnH(), 7),
+ "umlalt z9.s, z11.h, z3.h[7]");
+ COMPARE_MACRO(Umlslb(z12.VnD(), z12.VnD(), z15.VnS(), z9.VnS(), 2),
+ "umlslb z12.d, z15.s, z9.s[2]");
+ COMPARE_MACRO(Umlslb(z14.VnS(), z14.VnS(), z10.VnH(), z2.VnH(), 0),
+ "umlslb z14.s, z10.h, z2.h[0]");
+ COMPARE_MACRO(Umlslt(z12.VnD(), z12.VnD(), z28.VnS(), z8.VnS(), 3),
+ "umlslt z12.d, z28.s, z8.s[3]");
+ COMPARE_MACRO(Umlslt(z24.VnS(), z24.VnS(), z12.VnH(), z6.VnH(), 1),
+ "umlslt z24.s, z12.h, z6.h[1]");
+
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z6.VnH(), 1),
+ "movprfx z2, z23\n"
+ "umlslt z2.s, z12.h, z6.h[1]");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z6.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z2.h, z6.h[1]\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z12.VnH(), z2.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z12.h, z2.h[1]\n"
+ "mov z2.d, z31.d");
+ COMPARE_MACRO(Umlslt(z2.VnS(), z23.VnS(), z2.VnH(), z2.VnH(), 1),
+ "movprfx z31, z23\n"
+ "umlslt z31.s, z2.h, z2.h[1]\n"
+ "mov z2.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_mul_long_index) {
+ SETUP();
+
+ COMPARE_MACRO(Smullb(z13.VnS(), z31.VnH(), z0.VnH(), 0),
+ "smullb z13.s, z31.h, z0.h[0]");
+ COMPARE_MACRO(Smullb(z8.VnD(), z22.VnS(), z0.VnS(), 0),
+ "smullb z8.d, z22.s, z0.s[0]");
+ COMPARE_MACRO(Smullt(z14.VnS(), z30.VnH(), z7.VnH(), 7),
+ "smullt z14.s, z30.h, z7.h[7]");
+ COMPARE_MACRO(Smullt(z22.VnD(), z28.VnS(), z15.VnS(), 3),
+ "smullt z22.d, z28.s, z15.s[3]");
+ COMPARE_MACRO(Umullb(z24.VnD(), z20.VnS(), z5.VnS(), 1),
+ "umullb z24.d, z20.s, z5.s[1]");
+ COMPARE_MACRO(Umullb(z28.VnS(), z19.VnH(), z3.VnH(), 4),
+ "umullb z28.s, z19.h, z3.h[4]");
+ COMPARE_MACRO(Umullt(z0.VnD(), z31.VnS(), z8.VnS(), 2),
+ "umullt z0.d, z31.s, z8.s[2]");
+ COMPARE_MACRO(Umullt(z14.VnS(), z20.VnH(), z5.VnH(), 6),
+ "umullt z14.s, z20.h, z5.h[6]");
+
+ CLEANUP();
+}
+
+TEST(sve2_sat_double_mul_high) {
+ SETUP();
+
+ COMPARE_MACRO(Sqdmulh(z18.VnB(), z25.VnB(), z1.VnB()),
+ "sqdmulh z18.b, z25.b, z1.b");
+ COMPARE_MACRO(Sqdmulh(z18.VnD(), z25.VnD(), z1.VnD()),
+ "sqdmulh z18.d, z25.d, z1.d");
+ COMPARE_MACRO(Sqdmulh(z18.VnH(), z25.VnH(), z1.VnH()),
+ "sqdmulh z18.h, z25.h, z1.h");
+ COMPARE_MACRO(Sqdmulh(z18.VnS(), z25.VnS(), z1.VnS()),
+ "sqdmulh z18.s, z25.s, z1.s");
+ COMPARE_MACRO(Sqrdmulh(z21.VnB(), z21.VnB(), z27.VnB()),
+ "sqrdmulh z21.b, z21.b, z27.b");
+ COMPARE_MACRO(Sqrdmulh(z21.VnD(), z21.VnD(), z27.VnD()),
+ "sqrdmulh z21.d, z21.d, z27.d");
+ COMPARE_MACRO(Sqrdmulh(z21.VnH(), z21.VnH(), z27.VnH()),
+ "sqrdmulh z21.h, z21.h, z27.h");
+ COMPARE_MACRO(Sqrdmulh(z21.VnS(), z21.VnS(), z27.VnS()),
+ "sqrdmulh z21.s, z21.s, z27.s");
+
+ CLEANUP();
+}
+
+TEST(sve2_flogb) {
+ SETUP();
+
+ COMPARE_MACRO(Flogb(z15.VnH(), p0.Merging(), z3.VnH()),
+ "flogb z15.h, p0/m, z3.h");
+ COMPARE_MACRO(Flogb(z15.VnS(), p0.Merging(), z3.VnS()),
+ "flogb z15.s, p0/m, z3.s");
+ COMPARE_MACRO(Flogb(z15.VnD(), p0.Merging(), z3.VnD()),
+ "flogb z15.d, p0/m, z3.d");
+ COMPARE_MACRO(Flogb(z15.VnD(), p0.Zeroing(), z3.VnD()),
+ "movprfx z15.d, p0/z, z15.d\n"
+ "flogb z15.d, p0/m, z3.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_fp_pair) {
+ SETUP();
+
+ COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z14.VnD(), z26.VnD()),
+ "faddp z14.d, p1/m, z14.d, z26.d");
+ COMPARE_MACRO(Faddp(z14.VnH(), p1.Merging(), z14.VnH(), z26.VnH()),
+ "faddp z14.h, p1/m, z14.h, z26.h");
+ COMPARE_MACRO(Faddp(z14.VnS(), p1.Merging(), z14.VnS(), z26.VnS()),
+ "faddp z14.s, p1/m, z14.s, z26.s");
+ COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z2.VnD(), z14.VnD()),
+ "fmaxnmp z2.d, p1/m, z2.d, z14.d");
+ COMPARE_MACRO(Fmaxnmp(z2.VnH(), p1.Merging(), z2.VnH(), z14.VnH()),
+ "fmaxnmp z2.h, p1/m, z2.h, z14.h");
+ COMPARE_MACRO(Fmaxnmp(z2.VnS(), p1.Merging(), z2.VnS(), z14.VnS()),
+ "fmaxnmp z2.s, p1/m, z2.s, z14.s");
+ COMPARE_MACRO(Fmaxp(z22.VnD(), p1.Merging(), z22.VnD(), z3.VnD()),
+ "fmaxp z22.d, p1/m, z22.d, z3.d");
+ COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z22.VnH(), z3.VnH()),
+ "fmaxp z22.h, p1/m, z22.h, z3.h");
+ COMPARE_MACRO(Fmaxp(z22.VnS(), p1.Merging(), z22.VnS(), z3.VnS()),
+ "fmaxp z22.s, p1/m, z22.s, z3.s");
+ COMPARE_MACRO(Fminnmp(z1.VnD(), p0.Merging(), z1.VnD(), z14.VnD()),
+ "fminnmp z1.d, p0/m, z1.d, z14.d");
+ COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z1.VnH(), z14.VnH()),
+ "fminnmp z1.h, p0/m, z1.h, z14.h");
+ COMPARE_MACRO(Fminnmp(z1.VnS(), p0.Merging(), z1.VnS(), z14.VnS()),
+ "fminnmp z1.s, p0/m, z1.s, z14.s");
+ COMPARE_MACRO(Fminp(z16.VnD(), p3.Merging(), z16.VnD(), z11.VnD()),
+ "fminp z16.d, p3/m, z16.d, z11.d");
+ COMPARE_MACRO(Fminp(z16.VnH(), p3.Merging(), z16.VnH(), z11.VnH()),
+ "fminp z16.h, p3/m, z16.h, z11.h");
+ COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z16.VnS(), z11.VnS()),
+ "fminp z16.s, p3/m, z16.s, z11.s");
+
+ COMPARE_MACRO(Faddp(z14.VnD(), p1.Merging(), z13.VnD(), z26.VnD()),
+ "movprfx z14.d, p1/m, z13.d\n"
+ "faddp z14.d, p1/m, z14.d, z26.d");
+ COMPARE_MACRO(Fmaxnmp(z2.VnD(), p1.Merging(), z3.VnD(), z2.VnD()),
+ "mov z31.d, z2.d\n"
+ "movprfx z2.d, p1/m, z3.d\n"
+ "fmaxnmp z2.d, p1/m, z2.d, z31.d");
+ COMPARE_MACRO(Fmaxp(z22.VnH(), p1.Merging(), z23.VnH(), z3.VnH()),
+ "movprfx z22.h, p1/m, z23.h\n"
+ "fmaxp z22.h, p1/m, z22.h, z3.h");
+ COMPARE_MACRO(Fminnmp(z1.VnH(), p0.Merging(), z4.VnH(), z1.VnH()),
+ "mov z31.d, z1.d\n"
+ "movprfx z1.h, p0/m, z4.h\n"
+ "fminnmp z1.h, p0/m, z1.h, z31.h");
+ COMPARE_MACRO(Fminp(z16.VnS(), p3.Merging(), z11.VnS(), z11.VnS()),
+ "movprfx z16.s, p3/m, z11.s\n"
+ "fminp z16.s, p3/m, z16.s, z11.s");
+ CLEANUP();
+}
+
+TEST(sve2_fmlal_fmlsl_index) {
+ SETUP();
+
+ COMPARE_MACRO(Fmlalb(z16.VnS(), z16.VnS(), z18.VnH(), z2.VnH(), 0),
+ "fmlalb z16.s, z18.h, z2.h[0]");
+ COMPARE_MACRO(Fmlalb(z3.VnS(), z3.VnS(), z8.VnH(), z7.VnH(), 7),
+ "fmlalb z3.s, z8.h, z7.h[7]");
+ COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z13.VnH(), z5.VnH(), 6),
+ "fmlalt z18.s, z13.h, z5.h[6]");
+ COMPARE_MACRO(Fmlalt(z18.VnS(), z18.VnS(), z7.VnH(), z6.VnH(), 5),
+ "fmlalt z18.s, z7.h, z6.h[5]");
+ COMPARE_MACRO(Fmlslb(z16.VnS(), z16.VnS(), z10.VnH(), z1.VnH(), 4),
+ "fmlslb z16.s, z10.h, z1.h[4]");
+ COMPARE_MACRO(Fmlslb(z25.VnS(), z25.VnS(), z11.VnH(), z0.VnH(), 3),
+ "fmlslb z25.s, z11.h, z0.h[3]");
+ COMPARE_MACRO(Fmlslt(z3.VnS(), z3.VnS(), z17.VnH(), z4.VnH(), 2),
+ "fmlslt z3.s, z17.h, z4.h[2]");
+ COMPARE_MACRO(Fmlslt(z5.VnS(), z5.VnS(), z1.VnH(), z7.VnH(), 1),
+ "fmlslt z5.s, z1.h, z7.h[1]");
+
+ COMPARE_MACRO(Fmlalb(z5.VnS(), z4.VnS(), z1.VnH(), z7.VnH(), 1),
+ "movprfx z5, z4\n"
+ "fmlalb z5.s, z1.h, z7.h[1]");
+ COMPARE_MACRO(Fmlalt(z5.VnS(), z4.VnS(), z5.VnH(), z7.VnH(), 1),
+ "movprfx z31, z4\n"
+ "fmlalt z31.s, z5.h, z7.h[1]\n"
+ "mov z5.d, z31.d");
+ COMPARE_MACRO(Fmlslb(z5.VnS(), z4.VnS(), z1.VnH(), z5.VnH(), 1),
+ "movprfx z31, z4\n"
+ "fmlslb z31.s, z1.h, z5.h[1]\n"
+ "mov z5.d, z31.d");
+ COMPARE_MACRO(Fmlslt(z5.VnS(), z4.VnS(), z5.VnH(), z5.VnH(), 1),
+ "movprfx z31, z4\n"
+ "fmlslt z31.s, z5.h, z5.h[1]\n"
+ "mov z5.d, z31.d");
+ CLEANUP();
+}
+
+TEST(sve2_fp_convert) {
+ SETUP();
+
+ COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()),
+ "fcvtx z14.s, p4/m, z0.d");
+ COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Zeroing(), z0.VnD()),
+ "movprfx z14.d, p4/z, z14.d\n"
+ "fcvtx z14.s, p4/m, z0.d");
+ COMPARE_MACRO(Fcvtlt(z1.VnD(), p1.Merging(), z28.VnS()),
+ "fcvtlt z1.d, p1/m, z28.s");
+ COMPARE_MACRO(Fcvtlt(z10.VnS(), p5.Merging(), z0.VnH()),
+ "fcvtlt z10.s, p5/m, z0.h");
+ COMPARE_MACRO(Fcvtnt(z4.VnH(), p7.Merging(), z0.VnS()),
+ "fcvtnt z4.h, p7/m, z0.s");
+ COMPARE_MACRO(Fcvtnt(z8.VnS(), p0.Merging(), z4.VnD()),
+ "fcvtnt z8.s, p0/m, z4.d");
+ COMPARE_MACRO(Fcvtx(z14.VnS(), p4.Merging(), z0.VnD()),
+ "fcvtx z14.s, p4/m, z0.d");
+ COMPARE_MACRO(Fcvtxnt(z27.VnS(), p0.Merging(), z17.VnD()),
+ "fcvtxnt z27.s, p0/m, z17.d");
+
+ CLEANUP();
+}
+
+TEST(sve2_sat_double_mul_high_index) {
+ SETUP();
+
+ COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z7.VnH(), 1),
+ "sqdmulh z11.h, z20.h, z7.h[1]");
+ COMPARE_MACRO(Sqdmulh(z11.VnH(), z20.VnH(), z2.VnH(), 7),
+ "sqdmulh z11.h, z20.h, z2.h[7]");
+ COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z7.VnS(), 1),
+ "sqdmulh z8.s, z4.s, z7.s[1]");
+ COMPARE_MACRO(Sqdmulh(z8.VnS(), z4.VnS(), z3.VnS(), 3),
+ "sqdmulh z8.s, z4.s, z3.s[3]");
+ COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z0.VnD(), 1),
+ "sqdmulh z6.d, z13.d, z0.d[1]");
+ COMPARE_MACRO(Sqdmulh(z6.VnD(), z13.VnD(), z15.VnD(), 0),
+ "sqdmulh z6.d, z13.d, z15.d[0]");
+
+ COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z7.VnH(), 2),
+ "sqrdmulh z3.h, z29.h, z7.h[2]");
+ COMPARE_MACRO(Sqrdmulh(z3.VnH(), z29.VnH(), z3.VnH(), 7),
+ "sqrdmulh z3.h, z29.h, z3.h[7]");
+ COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z7.VnS(), 0),
+ "sqrdmulh z19.s, z15.s, z7.s[0]");
+ COMPARE_MACRO(Sqrdmulh(z19.VnS(), z15.VnS(), z2.VnS(), 3),
+ "sqrdmulh z19.s, z15.s, z2.s[3]");
+ COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z0.VnD(), 1),
+ "sqrdmulh z29.d, z13.d, z0.d[1]");
+ COMPARE_MACRO(Sqrdmulh(z29.VnD(), z13.VnD(), z15.VnD(), 0),
+ "sqrdmulh z29.d, z13.d, z15.d[0]");
+
+ CLEANUP();
+}
+
+TEST(sve2_extract) {
+ SETUP();
+
+ COMPARE_MACRO(Ext(z0.VnB(), z1.VnB(), z2.VnB(), 2),
+ "ext z0.b, {z1.b, z2.b}, #2");
+ COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 255),
+ "ext z0.b, {z31.b, z0.b}, #255");
+ COMPARE_MACRO(Ext(z0.VnB(), z31.VnB(), z0.VnB(), 0),
+ "ext z0.b, {z31.b, z0.b}, #0");
+
+ // Check destructive form is preferred over constructive.
+ COMPARE_MACRO(Ext(z0.VnB(), z0.VnB(), z1.VnB(), 42),
+ "ext z0.b, z0.b, z1.b, #42");
CLEANUP();
}
+TEST(sve_matmul) {
+ SETUP();
+
+ COMPARE_MACRO(Fmmla(z2.VnS(), z2.VnS(), z3.VnS(), z20.VnS()),
+ "fmmla z2.s, z3.s, z20.s");
+ COMPARE_MACRO(Fmmla(z21.VnD(), z21.VnD(), z30.VnD(), z2.VnD()),
+ "fmmla z21.d, z30.d, z2.d");
+ COMPARE_MACRO(Smmla(z31.VnS(), z31.VnS(), z7.VnB(), z19.VnB()),
+ "smmla z31.s, z7.b, z19.b");
+ COMPARE_MACRO(Ummla(z0.VnS(), z0.VnS(), z1.VnB(), z2.VnB()),
+ "ummla z0.s, z1.b, z2.b");
+ COMPARE_MACRO(Usmmla(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()),
+ "usmmla z30.s, z31.b, z4.b");
+
+ COMPARE_MACRO(Fmmla(z0.VnS(), z1.VnS(), z2.VnS(), z3.VnS()),
+ "movprfx z0, z1\n"
+ "fmmla z0.s, z2.s, z3.s");
+ COMPARE_MACRO(Smmla(z0.VnS(), z1.VnS(), z0.VnB(), z3.VnB()),
+ "movprfx z31, z1\n"
+ "smmla z31.s, z0.b, z3.b\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Ummla(z0.VnS(), z1.VnS(), z2.VnB(), z0.VnB()),
+ "movprfx z31, z1\n"
+ "ummla z31.s, z2.b, z0.b\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Usmmla(z0.VnS(), z1.VnS(), z0.VnB(), z0.VnB()),
+ "movprfx z31, z1\n"
+ "usmmla z31.s, z0.b, z0.b\n"
+ "mov z0.d, z31.d");
+
+ CLEANUP();
+}
+
+TEST(sve_usdot_sudot) {
+ SETUP();
+
+ COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB()),
+ "usdot z30.s, z31.b, z4.b");
+ COMPARE_MACRO(Usdot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 0),
+ "usdot z30.s, z31.b, z4.b[0]");
+ COMPARE_MACRO(Sudot(z30.VnS(), z30.VnS(), z31.VnB(), z4.VnB(), 3),
+ "sudot z30.s, z31.b, z4.b[3]");
+
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z28.VnB()),
+ "movprfx z0, z30\n"
+ "usdot z0.s, z29.b, z28.b");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB()),
+ "movprfx z31, z30\n"
+ "usdot z31.s, z29.b, z0.b\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z28.VnB()),
+ "movprfx z31, z30\n"
+ "usdot z31.s, z0.b, z28.b\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB()),
+ "movprfx z31, z30\n"
+ "usdot z31.s, z0.b, z0.b\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z29.VnB(), z4.VnB(), 0),
+ "movprfx z0, z30\n"
+ "usdot z0.s, z29.b, z4.b[0]");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z4.VnB(), 0),
+ "movprfx z31, z30\n"
+ "usdot z31.s, z0.b, z4.b[0]\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Sudot(z0.VnS(), z30.VnS(), z29.VnB(), z0.VnB(), 0),
+ "movprfx z31, z30\n"
+ "sudot z31.s, z29.b, z0.b[0]\n"
+ "mov z0.d, z31.d");
+ COMPARE_MACRO(Usdot(z0.VnS(), z30.VnS(), z0.VnB(), z0.VnB(), 0),
+ "movprfx z31, z30\n"
+ "usdot z31.s, z0.b, z0.b[0]\n"
+ "mov z0.d, z31.d");
+
+ CLEANUP();
+}
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
new file mode 100644
index 00000000..58d9f48c
--- /dev/null
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -0,0 +1,271 @@
+// Copyright 2021, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "test-runner.h"
+#include "test-utils.h"
+#include "aarch64/test-utils-aarch64.h"
+
+#include "aarch64/cpu-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#include "test-assembler-aarch64.h"
+
+#define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
+
+namespace vixl {
+namespace aarch64 {
+
+TEST_SVE(sve_matmul) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVEI8MM,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45179979); // smmla z25.s, z11.b, z23.b
+ // vl128 state = 0xf1ca8a4d
+ __ dci(0x45179b51); // smmla z17.s, z26.b, z23.b
+ // vl128 state = 0x4458ad10
+ __ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b
+ // vl128 state = 0x43d4d064
+ __ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b
+ // vl128 state = 0x601e77c8
+ __ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b
+ // vl128 state = 0x561b4e22
+ __ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b
+ // vl128 state = 0x89b65d78
+ __ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b
+ // vl128 state = 0x85c9e62d
+ __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
+ // vl128 state = 0x3fc74134
+ __ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b
+ // vl128 state = 0xa2fa347b
+ __ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
+ // vl128 state = 0xb9854782
+ __ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b
+ // vl128 state = 0x7fd376d8
+ __ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b
+ // vl128 state = 0xb41d8433
+ __ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b
+ // vl128 state = 0xc9c0e80d
+ __ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b
+ // vl128 state = 0xf1130e02
+ __ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b
+ // vl128 state = 0x282d3dc7
+ __ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b
+ // vl128 state = 0x34570238
+ __ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b
+ // vl128 state = 0xc451206a
+ __ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b
+ // vl128 state = 0xa58e2ea8
+ __ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b
+ // vl128 state = 0x7b5f948d
+ __ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b
+ // vl128 state = 0xf746260d
+ __ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b
+ // vl128 state = 0xc31cc539
+ __ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b
+ // vl128 state = 0x736bb3ee
+ __ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b
+ // vl128 state = 0xbb05fef6
+ __ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b
+ // vl128 state = 0xbc594372
+ __ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b
+ // vl128 state = 0x87c5a584
+ __ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b
+ // vl128 state = 0xa413f733
+ __ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b
+ // vl128 state = 0x87ec445d
+ __ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b
+ // vl128 state = 0x3ca8a6e5
+ __ dci(0x450898d0); // smmla z16.s, z6.b, z8.b
+ // vl128 state = 0x4300d87b
+ __ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b
+ // vl128 state = 0x38be2e8a
+ __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
+ // vl128 state = 0x8a3e6103
+ __ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b
+ // vl128 state = 0xc728e586
+ __ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
+ // vl128 state = 0x4cb44c0e
+ __ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b
+ // vl128 state = 0x84ebcb36
+ __ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b
+ // vl128 state = 0x8813d2e2
+ __ dci(0x451c999d); // smmla z29.s, z12.b, z28.b
+ // vl128 state = 0x8f26ee51
+ __ dci(0x451c999f); // smmla z31.s, z12.b, z28.b
+ // vl128 state = 0x5d626fd0
+ __ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b
+ // vl128 state = 0x6b64cc8f
+ __ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b
+ // vl128 state = 0x41648186
+ __ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b
+ // vl128 state = 0x701525ec
+ __ dci(0x45079816); // smmla z22.s, z0.b, z7.b
+ // vl128 state = 0x61a2d024
+ __ dci(0x450f9897); // smmla z23.s, z4.b, z15.b
+ // vl128 state = 0x82ba6bd5
+ __ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b
+ // vl128 state = 0xa842bbde
+ __ dci(0x450b98db); // smmla z27.s, z6.b, z11.b
+ // vl128 state = 0x9977677a
+ __ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b
+ // vl128 state = 0xe6d6c2ef
+ __ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b
+ // vl128 state = 0xa535453f
+ __ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b
+ // vl128 state = 0xeda3f381
+ __ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b
+ // vl128 state = 0xd72dbdef
+ __ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b
+ // vl128 state = 0xfae4975b
+ __ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b
+ // vl128 state = 0x0aa6e1f6
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x0aa6e1f6,
+ 0xba2d4547,
+ 0x0e72a647,
+ 0x15b8fc1b,
+ 0x92eddc98,
+ 0xe0c72bcf,
+ 0x36b4e3ba,
+ 0x1041114e,
+ 0x4d44ebd4,
+ 0xfe0e3cbf,
+ 0x81c43455,
+ 0x678617c5,
+ 0xf72fac1f,
+ 0xabdcd4e4,
+ 0x108864bd,
+ 0x035f6eca,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve_fmatmul_s) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVEF32MM,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s
+ // vl128 state = 0x9db41bef
+ __ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s
+ // vl128 state = 0xc1535e55
+ __ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s
+ // vl128 state = 0xc65aad35
+ __ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s
+ // vl128 state = 0x68387c22
+ __ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s
+ // vl128 state = 0xcf08b3a4
+ __ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s
+ // vl128 state = 0x969bbe77
+ __ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s
+ // vl128 state = 0xc3f514e1
+ __ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s
+ // vl128 state = 0x4b351c29
+ __ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s
+ // vl128 state = 0x5e026315
+ __ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s
+ // vl128 state = 0x61684fe6
+ __ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s
+ // vl128 state = 0x719b4ce0
+ __ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s
+ // vl128 state = 0xdf3d2a1c
+ __ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s
+ // vl128 state = 0x3279aab8
+ __ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s
+ // vl128 state = 0x0b985869
+ __ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s
+ // vl128 state = 0x14230587
+ __ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s
+ // vl128 state = 0x2cb88e7f
+ __ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s
+ // vl128 state = 0xb5ec0c65
+ __ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s
+ // vl128 state = 0xb5e5eab0
+ __ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s
+ // vl128 state = 0xd0491fb5
+ __ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s
+ // vl128 state = 0x98a55a30
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x98a55a30,
+ 0x590b7715,
+ 0x4562ccf3,
+ 0x1f8653a6,
+ 0x5fe174d5,
+ 0xb300dcb8,
+ 0x3cefa79e,
+ 0xa22484c7,
+ 0x380697ec,
+ 0xde9e699b,
+ 0x99d21870,
+ 0x456cb46b,
+ 0x207d2615,
+ 0xecaf9678,
+ 0x0949e2d2,
+ 0xa764c43f,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+} // namespace aarch64
+} // namespace vixl
diff --git a/test/aarch64/test-simulator-sve2-aarch64.cc b/test/aarch64/test-simulator-sve2-aarch64.cc
new file mode 100644
index 00000000..0a4c6d13
--- /dev/null
+++ b/test/aarch64/test-simulator-sve2-aarch64.cc
@@ -0,0 +1,9122 @@
+// Copyright 2020, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <cfloat>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+
+#include "test-runner.h"
+#include "test-utils.h"
+#include "aarch64/test-utils-aarch64.h"
+
+#include "aarch64/cpu-aarch64.h"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#include "aarch64/simulator-aarch64.h"
+#include "test-assembler-aarch64.h"
+
+#define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
+
+namespace vixl {
+namespace aarch64 {
+
+TEST_SVE(sve2_halving_arithmetic) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x441182b2); // uhadd z18.b, p0/m, z18.b, z21.b
+ // vl128 state = 0x8ac2942a
+ __ dci(0x441382f3); // uhsub z19.b, p0/m, z19.b, z23.b
+ // vl128 state = 0x0e0db643
+ __ dci(0x449383fb); // uhsub z27.s, p0/m, z27.s, z31.s
+ // vl128 state = 0x6a97fc8c
+ __ dci(0x441283fa); // shsub z26.b, p0/m, z26.b, z31.b
+ // vl128 state = 0x48a5fd5f
+ __ dci(0x44928372); // shsub z18.s, p0/m, z18.s, z27.s
+ // vl128 state = 0x7c670d36
+ __ dci(0x44d2827a); // shsub z26.d, p0/m, z26.d, z19.d
+ // vl128 state = 0x3a15c66f
+ __ dci(0x4492823b); // shsub z27.s, p0/m, z27.s, z17.s
+ // vl128 state = 0xe407c826
+ __ dci(0x44978239); // uhsubr z25.s, p0/m, z25.s, z17.s
+ // vl128 state = 0xf7157dae
+ __ dci(0x4493827d); // uhsub z29.s, p0/m, z29.s, z19.s
+ // vl128 state = 0xcebff22f
+ __ dci(0x449782f9); // uhsubr z25.s, p0/m, z25.s, z23.s
+ // vl128 state = 0xbe691139
+ __ dci(0x44978231); // uhsubr z17.s, p0/m, z17.s, z17.s
+ // vl128 state = 0x59b2af72
+ __ dci(0x44578233); // uhsubr z19.h, p0/m, z19.h, z17.h
+ // vl128 state = 0xd7fad727
+ __ dci(0x44578312); // uhsubr z18.h, p0/m, z18.h, z24.h
+ // vl128 state = 0x87b5d00a
+ __ dci(0x44578610); // uhsubr z16.h, p1/m, z16.h, z16.h
+ // vl128 state = 0xbaae097d
+ __ dci(0x44578618); // uhsubr z24.h, p1/m, z24.h, z16.h
+ // vl128 state = 0x3887509e
+ __ dci(0x44168608); // shsubr z8.b, p1/m, z8.b, z16.b
+ // vl128 state = 0xc16dc63b
+ __ dci(0x44128700); // shsub z0.b, p1/m, z0.b, z24.b
+ // vl128 state = 0x3eddcd6d
+ __ dci(0x44528f02); // shsub z2.h, p3/m, z2.h, z24.h
+ // vl128 state = 0x2e7ffa0d
+ __ dci(0x44538f40); // uhsub z0.h, p3/m, z0.h, z26.h
+ // vl128 state = 0x1f68bee5
+ __ dci(0x44538342); // uhsub z2.h, p0/m, z2.h, z26.h
+ // vl128 state = 0x2a368049
+ __ dci(0x44538040); // uhsub z0.h, p0/m, z0.h, z2.h
+ // vl128 state = 0x0537f844
+ __ dci(0x44568044); // shsubr z4.h, p0/m, z4.h, z2.h
+ // vl128 state = 0x0dfac1b2
+ __ dci(0x445688cc); // shsubr z12.h, p2/m, z12.h, z6.h
+ // vl128 state = 0xbefa909b
+ __ dci(0x44d288dc); // shsub z28.d, p2/m, z28.d, z6.d
+ // vl128 state = 0xbadc14bb
+ __ dci(0x44d288d8); // shsub z24.d, p2/m, z24.d, z6.d
+ // vl128 state = 0x518130c0
+ __ dci(0x44d088f0); // shadd z16.d, p2/m, z16.d, z7.d
+ // vl128 state = 0xb01856bd
+ __ dci(0x44d08cd2); // shadd z18.d, p3/m, z18.d, z6.d
+ // vl128 state = 0xbbcfeaa2
+ __ dci(0x44d484d0); // srhadd z16.d, p1/m, z16.d, z6.d
+ // vl128 state = 0xefe1d416
+ __ dci(0x44d496d1); // srhadd z17.d, p5/m, z17.d, z22.d
+ // vl128 state = 0xceb574b8
+ __ dci(0x44d196d5); // uhadd z21.d, p5/m, z21.d, z22.d
+ // vl128 state = 0x46cdd268
+ __ dci(0x44d496dd); // srhadd z29.d, p5/m, z29.d, z22.d
+ // vl128 state = 0x21a81b6a
+ __ dci(0x4494969c); // srhadd z28.s, p5/m, z28.s, z20.s
+ // vl128 state = 0x2316cb04
+ __ dci(0x4494968c); // srhadd z12.s, p5/m, z12.s, z20.s
+ // vl128 state = 0x6248cc0a
+ __ dci(0x4415968d); // urhadd z13.b, p5/m, z13.b, z20.b
+ // vl128 state = 0x6edd11e0
+ __ dci(0x44119e8c); // uhadd z12.b, p7/m, z12.b, z20.b
+ // vl128 state = 0x81841eb6
+ __ dci(0x4491968d); // uhadd z13.s, p5/m, z13.s, z20.s
+ // vl128 state = 0x02b8b893
+ __ dci(0x44118685); // uhadd z5.b, p1/m, z5.b, z20.b
+ // vl128 state = 0x707db891
+ __ dci(0x44138e8d); // uhsub z13.b, p3/m, z13.b, z20.b
+ // vl128 state = 0x2caa64dd
+ __ dci(0x44139e0c); // uhsub z12.b, p7/m, z12.b, z16.b
+ // vl128 state = 0xe34695ef
+ __ dci(0x44128e0d); // shsub z13.b, p3/m, z13.b, z16.b
+ // vl128 state = 0x477197dd
+ __ dci(0x44129a1d); // shsub z29.b, p6/m, z29.b, z16.b
+ // vl128 state = 0x19cebaa2
+ __ dci(0x44129a19); // shsub z25.b, p6/m, z25.b, z16.b
+ // vl128 state = 0x0d62dca4
+ __ dci(0x44129249); // shsub z9.b, p4/m, z9.b, z18.b
+ // vl128 state = 0x327e81e3
+ __ dci(0x44129248); // shsub z8.b, p4/m, z8.b, z18.b
+ // vl128 state = 0x28ec9bf8
+ __ dci(0x44169269); // shsubr z9.b, p4/m, z9.b, z19.b
+ // vl128 state = 0x652ca8c9
+ __ dci(0x44168661); // shsubr z1.b, p1/m, z1.b, z19.b
+ // vl128 state = 0x46fcb15a
+ __ dci(0x44168420); // shsubr z0.b, p1/m, z0.b, z1.b
+ // vl128 state = 0x7151e02b
+ __ dci(0x44168428); // shsubr z8.b, p1/m, z8.b, z1.b
+ // vl128 state = 0x4c8921f6
+ __ dci(0x44148409); // srhadd z9.b, p1/m, z9.b, z0.b
+ // vl128 state = 0xd0d2fc1c
+ __ dci(0x44148641); // srhadd z1.b, p1/m, z1.b, z18.b
+ // vl128 state = 0xc821f381
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xc821f381,
+ 0xc0ad3b7c,
+ 0x4eb4ba1b,
+ 0xdc8e061a,
+ 0x64675a15,
+ 0x923703bf,
+ 0x6944c0db,
+ 0x7ac89bae,
+ 0x8fa4c45f,
+ 0xf64c8b4c,
+ 0x8ba751b7,
+ 0x2fe8832e,
+ 0xc6b8000d,
+ 0x864ba0ff,
+ 0xded22c04,
+ 0x213cf65e,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sli_sri) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4509f07f); // sri z31.b, z3.b, #7
+ // vl128 state = 0x509a7a2d
+ __ dci(0x454bf07e); // sri z30.s, z3.s, #21
+ // vl128 state = 0xc973a4e8
+ __ dci(0x450bf17a); // sri z26.b, z11.b, #5
+ // vl128 state = 0xa9dcbcf5
+ __ dci(0x450ef17b); // sri z27.b, z11.b, #2
+ // vl128 state = 0xd56761c1
+ __ dci(0x458ef1f9); // sri z25.d, z15.d, #50
+ // vl128 state = 0xdd84a538
+ __ dci(0x459ff1fb); // sri z27.d, z15.d, #33
+ // vl128 state = 0x4e2dbf4a
+ __ dci(0x459ff5df); // sli z31.d, z14.d, #31
+ // vl128 state = 0x46d9563e
+ __ dci(0x45d7f5cf); // sli z15.d, z14.d, #55
+ // vl128 state = 0xf4fcf912
+ __ dci(0x4593f5ce); // sli z14.d, z14.d, #19
+ // vl128 state = 0xcef34d18
+ __ dci(0x4593f1fe); // sri z30.d, z15.d, #45
+ // vl128 state = 0x69509e94
+ __ dci(0x4581f1ff); // sri z31.d, z15.d, #63
+ // vl128 state = 0x09cd0cf7
+ __ dci(0x45c1f1bd); // sri z29.d, z13.d, #31
+ // vl128 state = 0xfc095f8b
+ __ dci(0x45c1f03c); // sri z28.d, z1.d, #31
+ // vl128 state = 0x0ca836f0
+ __ dci(0x45c1f4b4); // sli z20.d, z5.d, #33
+ // vl128 state = 0x678be6b3
+ __ dci(0x45c1f5f0); // sli z16.d, z15.d, #33
+ // vl128 state = 0x7a743b56
+ __ dci(0x45c7f5f2); // sli z18.d, z15.d, #39
+ // vl128 state = 0x0bbc4117
+ __ dci(0x45c7f5e2); // sli z2.d, z15.d, #39
+ // vl128 state = 0x13e1a7ae
+ __ dci(0x45c7f1a0); // sri z0.d, z13.d, #25
+ // vl128 state = 0x8014a497
+ __ dci(0x4597f1b0); // sri z16.d, z13.d, #41
+ // vl128 state = 0x5f7994a8
+ __ dci(0x4593f5b1); // sli z17.d, z13.d, #19
+ // vl128 state = 0x125f37b5
+ __ dci(0x4591f5f0); // sli z16.d, z15.d, #17
+ // vl128 state = 0x26f1fdf2
+ __ dci(0x4581f5d2); // sli z18.d, z14.d, #1
+ // vl128 state = 0x5b0baccc
+ __ dci(0x4541f5d6); // sli z22.s, z14.s, #1
+ // vl128 state = 0x74f04ecb
+ __ dci(0x4551f1d4); // sri z20.s, z14.s, #15
+ // vl128 state = 0xc43d0586
+ __ dci(0x4553f150); // sri z16.s, z10.s, #13
+ // vl128 state = 0xce8c688a
+ __ dci(0x4557f171); // sri z17.s, z11.s, #9
+ // vl128 state = 0x03a5b3b0
+ __ dci(0x4513f175); // sri z21.h, z11.h, #13
+ // vl128 state = 0x392ab48e
+ __ dci(0x4551f177); // sri z23.s, z11.s, #15
+ // vl128 state = 0xa886dbc8
+ __ dci(0x4551f17f); // sri z31.s, z11.s, #15
+ // vl128 state = 0x37c804bc
+ __ dci(0x4551f16f); // sri z15.s, z11.s, #15
+ // vl128 state = 0x17e99d67
+ __ dci(0x4550f067); // sri z7.s, z3.s, #16
+ // vl128 state = 0xb0bd981a
+ __ dci(0x4550f077); // sri z23.s, z3.s, #16
+ // vl128 state = 0x5f643b3e
+ __ dci(0x4551f0f5); // sri z21.s, z7.s, #15
+ // vl128 state = 0xa0b83a32
+ __ dci(0x4551f09d); // sri z29.s, z4.s, #15
+ // vl128 state = 0x890807a1
+ __ dci(0x4552f08d); // sri z13.s, z4.s, #14
+ // vl128 state = 0x81cb8fa4
+ __ dci(0x4512f01d); // sri z29.h, z0.h, #14
+ // vl128 state = 0x62751a54
+ __ dci(0x4552f419); // sli z25.s, z0.s, #18
+ // vl128 state = 0xfd7c0337
+ __ dci(0x4542f49b); // sli z27.s, z4.s, #2
+ // vl128 state = 0x0089e534
+ __ dci(0x454af09a); // sri z26.s, z4.s, #22
+ // vl128 state = 0xea87d159
+ __ dci(0x45caf0d8); // sri z24.d, z6.d, #22
+ // vl128 state = 0x3c44b845
+ __ dci(0x45c2f2dc); // sri z28.d, z22.d, #30
+ // vl128 state = 0x9b8c17a7
+ __ dci(0x45caf25d); // sri z29.d, z18.d, #22
+ // vl128 state = 0x3e2c1797
+ __ dci(0x45caf0dc); // sri z28.d, z6.d, #22
+ // vl128 state = 0xbf933754
+ __ dci(0x458af1cc); // sri z12.d, z14.d, #54
+ // vl128 state = 0x93e91a23
+ __ dci(0x4586f1cd); // sri z13.d, z14.d, #58
+ // vl128 state = 0x0f7c6faa
+ __ dci(0x458ef0cc); // sri z12.d, z6.d, #50
+ // vl128 state = 0x1d771f71
+ __ dci(0x458ef00d); // sri z13.d, z0.d, #50
+ // vl128 state = 0x29a23da7
+ __ dci(0x450ef05d); // sri z29.b, z2.b, #2
+ // vl128 state = 0x74fd2038
+ __ dci(0x450cf00d); // sri z13.b, z0.b, #4
+ // vl128 state = 0x075bc166
+ __ dci(0x450cf00c); // sri z12.b, z0.b, #4
+ // vl128 state = 0xfd3d290f
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xfd3d290f,
+ 0x8dd0bdab,
+ 0xa25ba843,
+ 0x484543ed,
+ 0x22df2f4f,
+ 0xb62769dc,
+ 0x795e30f7,
+ 0xe49948e7,
+ 0xd4ceb676,
+ 0xbf2d359a,
+ 0xcf4331a9,
+ 0x8cce4eef,
+ 0x4fbaec97,
+ 0x4fec4d88,
+ 0x3efc521d,
+ 0xffef31d1,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_srshr_urshr) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x04cc9074); // srshr z20.d, p4/m, z20.d, #29
+ // vl128 state = 0xecefbcaa
+ __ dci(0x04cc9236); // srshr z22.d, p4/m, z22.d, #15
+ // vl128 state = 0x7eef75c3
+ __ dci(0x04cd927e); // urshr z30.d, p4/m, z30.d, #13
+ // vl128 state = 0xf5ab0a43
+ __ dci(0x04cd9e76); // urshr z22.d, p7/m, z22.d, #13
+ // vl128 state = 0x67a9d15a
+ __ dci(0x04cd9a57); // urshr z23.d, p6/m, z23.d, #14
+ // vl128 state = 0xf1591f3f
+ __ dci(0x044d9247); // urshr z7.s, p4/m, z7.s, #14
+ // vl128 state = 0xcb770d03
+ __ dci(0x044d9245); // urshr z5.s, p4/m, z5.s, #14
+ // vl128 state = 0x7a225c92
+ __ dci(0x044d9241); // urshr z1.s, p4/m, z1.s, #14
+ // vl128 state = 0x31e4f59a
+ __ dci(0x044d8200); // urshr z0.s, p0/m, z0.s, #16
+ // vl128 state = 0x7c0c67fa
+ __ dci(0x044d8330); // urshr z16.s, p0/m, z16.s, #7
+ // vl128 state = 0x2aaa996d
+ __ dci(0x044d8340); // urshr z0.s, p0/m, z0.s, #6
+ // vl128 state = 0x1999a541
+ __ dci(0x044d8104); // urshr z4.s, p0/m, z4.s, #24
+ // vl128 state = 0xbebc22f3
+ __ dci(0x044d8526); // urshr z6.s, p1/m, z6.s, #23
+ // vl128 state = 0x5e9c818d
+ __ dci(0x04cd8502); // urshr z2.d, p1/m, z2.d, #24
+ // vl128 state = 0x9cd88e00
+ __ dci(0x048d9506); // urshr z6.d, p5/m, z6.d, #56
+ // vl128 state = 0xff60a16e
+ __ dci(0x048d9504); // urshr z4.d, p5/m, z4.d, #56
+ // vl128 state = 0xfae64bf4
+ __ dci(0x048d8705); // urshr z5.d, p1/m, z5.d, #40
+ // vl128 state = 0xbd7bc8bb
+ __ dci(0x048d9307); // urshr z7.d, p4/m, z7.d, #40
+ // vl128 state = 0x22e58729
+ __ dci(0x048c9323); // srshr z3.d, p4/m, z3.d, #39
+ // vl128 state = 0x1a2b90d1
+ __ dci(0x048c8721); // srshr z1.d, p1/m, z1.d, #39
+ // vl128 state = 0xf31798ea
+ __ dci(0x04cc8f20); // srshr z0.d, p3/m, z0.d, #7
+ // vl128 state = 0x3a159e41
+ __ dci(0x04cc87b0); // srshr z16.d, p1/m, z16.d, #3
+ // vl128 state = 0x461819c6
+ __ dci(0x04cc8778); // srshr z24.d, p1/m, z24.d, #5
+ // vl128 state = 0x52c8c945
+ __ dci(0x048c8730); // srshr z16.d, p1/m, z16.d, #39
+ // vl128 state = 0xa6724c16
+ __ dci(0x040c8534); // srshr z20.b, p1/m, z20.b, #7
+ // vl128 state = 0xfeae5ea1
+ __ dci(0x040c957c); // srshr z28.b, p5/m, z28.b, #5
+ // vl128 state = 0xe55cac9f
+ __ dci(0x048c9554); // srshr z20.d, p5/m, z20.d, #54
+ // vl128 state = 0x41ccbe50
+ __ dci(0x048c8156); // srshr z22.d, p0/m, z22.d, #54
+ // vl128 state = 0xfef5c71e
+ __ dci(0x040c8957); // srshr z23.b, p2/m, z23.b, #6
+ // vl128 state = 0xac8cf177
+ __ dci(0x040c8bd5); // srshr z21.h, p2/m, z21.h, #2
+ // vl128 state = 0xfe7005fe
+ __ dci(0x040c8354); // srshr z20.h, p0/m, z20.h, #6
+ // vl128 state = 0x1daa6598
+ __ dci(0x040c931c); // srshr z28.h, p4/m, z28.h, #8
+ // vl128 state = 0x8c7f2675
+ __ dci(0x040c9798); // srshr z24.h, p5/m, z24.h, #4
+ // vl128 state = 0x2349e927
+ __ dci(0x044c97ba); // srshr z26.s, p5/m, z26.s, #3
+ // vl128 state = 0xf3670053
+ __ dci(0x040c9faa); // srshr z10.h, p7/m, z10.h, #3
+ // vl128 state = 0x61333578
+ __ dci(0x044d9fae); // urshr z14.s, p7/m, z14.s, #3
+ // vl128 state = 0xdb1232a3
+ __ dci(0x044d8f8f); // urshr z15.s, p3/m, z15.s, #4
+ // vl128 state = 0xb1b4bda1
+ __ dci(0x044d8f87); // urshr z7.s, p3/m, z7.s, #4
+ // vl128 state = 0xba636ab8
+ __ dci(0x044d9d97); // urshr z23.s, p7/m, z23.s, #20
+ // vl128 state = 0x8ab01b49
+ __ dci(0x040d9593); // urshr z19.b, p5/m, z19.b, #4
+ // vl128 state = 0x20ee49b4
+ __ dci(0x040d959b); // urshr z27.b, p5/m, z27.b, #4
+ // vl128 state = 0xe34dcf2e
+ __ dci(0x044c959a); // srshr z26.s, p5/m, z26.s, #20
+ // vl128 state = 0x65bafb28
+ __ dci(0x044d9492); // urshr z18.s, p5/m, z18.s, #28
+ // vl128 state = 0xcbed1382
+ __ dci(0x044c8493); // srshr z19.s, p1/m, z19.s, #28
+ // vl128 state = 0xa54fb84c
+ __ dci(0x044c8cc3); // srshr z3.s, p3/m, z3.s, #26
+ // vl128 state = 0x257267ee
+ __ dci(0x044c8c0b); // srshr z11.s, p3/m, z11.s, #32
+ // vl128 state = 0xd494a3e8
+ __ dci(0x044c8c6f); // srshr z15.s, p3/m, z15.s, #29
+ // vl128 state = 0x63621477
+ __ dci(0x044c9c2e); // srshr z14.s, p7/m, z14.s, #31
+ // vl128 state = 0x4cb2e888
+ __ dci(0x04cc943e); // srshr z30.d, p5/m, z30.d, #31
+ // vl128 state = 0x8e580ba2
+ __ dci(0x04cd953f); // urshr z31.d, p5/m, z31.d, #23
+ // vl128 state = 0x7678cc05
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x7678cc05,
+ 0x37f2893a,
+ 0xce2a105d,
+ 0x5a03f5a3,
+ 0x81444dfc,
+ 0x5581c0c1,
+ 0xfee622cc,
+ 0x0f6796a5,
+ 0xf151a5fd,
+ 0x13e9be9c,
+ 0x9685f8b5,
+ 0xa6827285,
+ 0x7ad6d004,
+ 0xba7989ae,
+ 0x96fe2826,
+ 0xd1ddc17e,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sqshl_uqshl) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x044f86aa); // sqshlu z10.s, p1/m, z10.s, #21
+ // vl128 state = 0x37777991
+ __ dci(0x044f8482); // sqshlu z2.s, p1/m, z2.s, #4
+ // vl128 state = 0x8119dd5a
+ __ dci(0x048f8480); // sqshlu z0.d, p1/m, z0.d, #4
+ // vl128 state = 0x8966cd23
+ __ dci(0x04cf8c82); // sqshlu z2.d, p3/m, z2.d, #36
+ // vl128 state = 0x71b53135
+ __ dci(0x044f8892); // sqshlu z18.s, p2/m, z18.s, #4
+ // vl128 state = 0x44e0e9a7
+ __ dci(0x04cf8996); // sqshlu z22.d, p2/m, z22.d, #44
+ // vl128 state = 0x4e4b77b9
+ __ dci(0x04cf9194); // sqshlu z20.d, p4/m, z20.d, #44
+ // vl128 state = 0x66d72728
+ __ dci(0x04cf9b9c); // sqshlu z28.d, p6/m, z28.d, #60
+ // vl128 state = 0xa80f62ce
+ __ dci(0x04c79f8c); // uqshl z12.d, p7/m, z12.d, #60
+ // vl128 state = 0x87a3a8c0
+ __ dci(0x04469f88); // sqshl z8.s, p7/m, z8.s, #28
+ // vl128 state = 0x3db302cb
+ __ dci(0x04469f8a); // sqshl z10.s, p7/m, z10.s, #28
+ // vl128 state = 0x2d66bbb2
+ __ dci(0x04469a8e); // sqshl z14.s, p6/m, z14.s, #20
+ // vl128 state = 0x39524732
+ __ dci(0x04c69a1e); // sqshl z30.d, p6/m, z30.d, #48
+ // vl128 state = 0x39d71433
+ __ dci(0x04c68a9a); // sqshl z26.d, p2/m, z26.d, #52
+ // vl128 state = 0x58771cfb
+ __ dci(0x04469a8a); // sqshl z10.s, p6/m, z10.s, #20
+ // vl128 state = 0xa773fcc9
+ __ dci(0x04c68a88); // sqshl z8.d, p2/m, z8.d, #52
+ // vl128 state = 0x9dce801c
+ __ dci(0x04469a89); // sqshl z9.s, p6/m, z9.s, #20
+ // vl128 state = 0x4141302f
+ __ dci(0x04479b81); // uqshl z1.s, p6/m, z1.s, #28
+ // vl128 state = 0x369084f9
+ __ dci(0x044f9f91); // sqshlu z17.s, p7/m, z17.s, #28
+ // vl128 state = 0x1570bb90
+ __ dci(0x04479e90); // uqshl z16.s, p7/m, z16.s, #20
+ // vl128 state = 0x27765662
+ __ dci(0x044f9f94); // sqshlu z20.s, p7/m, z20.s, #28
+ // vl128 state = 0xe99bcbb9
+ __ dci(0x04479795); // uqshl z21.s, p5/m, z21.s, #28
+ // vl128 state = 0xb36c3b9f
+ __ dci(0x04479754); // uqshl z20.s, p5/m, z20.s, #26
+ // vl128 state = 0x435e0256
+ __ dci(0x04479750); // uqshl z16.s, p5/m, z16.s, #26
+ // vl128 state = 0x485471e9
+ __ dci(0x04479740); // uqshl z0.s, p5/m, z0.s, #26
+ // vl128 state = 0x170e10cb
+ __ dci(0x04079544); // uqshl z4.b, p5/m, z4.b, #2
+ // vl128 state = 0x026fe32a
+ __ dci(0x04c79546); // uqshl z6.d, p5/m, z6.d, #42
+ // vl128 state = 0x9a92b063
+ __ dci(0x04c78504); // uqshl z4.d, p1/m, z4.d, #40
+ // vl128 state = 0x4e9a105e
+ __ dci(0x04879500); // uqshl z0.d, p5/m, z0.d, #8
+ // vl128 state = 0x958b4d28
+ __ dci(0x04879908); // uqshl z8.d, p6/m, z8.d, #8
+ // vl128 state = 0x420ff82d
+ __ dci(0x04879318); // uqshl z24.d, p4/m, z24.d, #24
+ // vl128 state = 0x88002097
+ __ dci(0x0487931a); // uqshl z26.d, p4/m, z26.d, #24
+ // vl128 state = 0x3047401c
+ __ dci(0x0486938a); // sqshl z10.d, p4/m, z10.d, #28
+ // vl128 state = 0x5b2b7938
+ __ dci(0x04069188); // sqshl z8.b, p4/m, z8.b, #4
+ // vl128 state = 0xb92dd260
+ __ dci(0x04469389); // sqshl z9.s, p4/m, z9.s, #28
+ // vl128 state = 0xdc6370c3
+ __ dci(0x0447918b); // uqshl z11.s, p4/m, z11.s, #12
+ // vl128 state = 0x5e6198f0
+ __ dci(0x0447913b); // uqshl z27.s, p4/m, z27.s, #9
+ // vl128 state = 0x935ed2a3
+ __ dci(0x0447915f); // uqshl z31.s, p4/m, z31.s, #10
+ // vl128 state = 0x76271654
+ __ dci(0x0406915d); // sqshl z29.b, p4/m, z29.b, #2
+ // vl128 state = 0x46a71ae3
+ __ dci(0x0486911f); // sqshl z31.d, p4/m, z31.d, #8
+ // vl128 state = 0x2c7320a6
+ __ dci(0x0486911d); // sqshl z29.d, p4/m, z29.d, #8
+ // vl128 state = 0x4aa0022d
+ __ dci(0x04869b1f); // sqshl z31.d, p6/m, z31.d, #24
+ // vl128 state = 0x2de081d7
+ __ dci(0x04069317); // sqshl z23.h, p4/m, z23.h, #8
+ // vl128 state = 0x879c9ead
+ __ dci(0x0447931f); // uqshl z31.s, p4/m, z31.s, #24
+ // vl128 state = 0x51070552
+ __ dci(0x04479b9e); // uqshl z30.s, p6/m, z30.s, #28
+ // vl128 state = 0x8cc26b2b
+ __ dci(0x04479adf); // uqshl z31.s, p6/m, z31.s, #22
+ // vl128 state = 0x8f4512d3
+ __ dci(0x04479adb); // uqshl z27.s, p6/m, z27.s, #22
+ // vl128 state = 0x3d44e050
+ __ dci(0x04079a99); // uqshl z25.h, p6/m, z25.h, #4
+ // vl128 state = 0xede0c288
+ __ dci(0x04079a89); // uqshl z9.h, p6/m, z9.h, #4
+ // vl128 state = 0x928beed6
+ __ dci(0x04879acb); // uqshl z11.d, p6/m, z11.d, #22
+ // vl128 state = 0x6945e18a
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x6945e18a,
+ 0x0e954f70,
+ 0x3d269eb2,
+ 0xefeb5acb,
+ 0xfb27cb0c,
+ 0x651a1aea,
+ 0x07011083,
+ 0xd425418b,
+ 0xa0e026c6,
+ 0x407c416e,
+ 0x14e25761,
+ 0x21eef576,
+ 0xc6ad09eb,
+ 0x3642006b,
+ 0xdebec165,
+ 0x24ae8a32,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_unsigned_sat_round_shift) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
+ __ dci(0x44cb84cb); // uqrshl z11.d, p1/m, z11.d, z6.d
+ // vl128 state = 0x9794ef4a
+ __ dci(0x444b85db); // uqrshl z27.h, p1/m, z27.h, z14.h
+ // vl128 state = 0xda137fcc
+ __ dci(0x444b874b); // uqrshl z11.h, p1/m, z11.h, z26.h
+ // vl128 state = 0xafc1533b
+ __ dci(0x444b87fb); // uqrshl z27.h, p1/m, z27.h, z31.h
+ // vl128 state = 0x228890a2
+ __ dci(0x444b87f3); // uqrshl z19.h, p1/m, z19.h, z31.h
+ // vl128 state = 0x5cb0d356
+ __ dci(0x444385f1); // urshl z17.h, p1/m, z17.h, z15.h
+ // vl128 state = 0xbb6b6d1d
+ __ dci(0x444795f3); // urshlr z19.h, p5/m, z19.h, z15.h
+ // vl128 state = 0x98b43358
+ __ dci(0x44479552); // urshlr z18.h, p5/m, z18.h, z10.h
+ // vl128 state = 0x472880b2
+ __ dci(0x44c79502); // urshlr z2.d, p5/m, z2.d, z8.d
+ // vl128 state = 0x0995d86f
+ __ dci(0x44879406); // urshlr z6.s, p5/m, z6.s, z0.s
+ // vl128 state = 0x405211cd
+ __ dci(0x44079436); // urshlr z22.b, p5/m, z22.b, z1.b
+ // vl128 state = 0x563647b0
+ __ dci(0x44078c34); // urshlr z20.b, p3/m, z20.b, z1.b
+ // vl128 state = 0x2eacf2d3
+ __ dci(0x440f843c); // uqrshlr z28.b, p1/m, z28.b, z1.b
+ // vl128 state = 0x56f472ce
+ __ dci(0x440f8cbe); // uqrshlr z30.b, p3/m, z30.b, z5.b
+ // vl128 state = 0x910ce8d0
+ __ dci(0x44078eba); // urshlr z26.b, p3/m, z26.b, z21.b
+ // vl128 state = 0xc47b6482
+ __ dci(0x44078ebe); // urshlr z30.b, p3/m, z30.b, z21.b
+ // vl128 state = 0xff805975
+ __ dci(0x440f86b6); // uqrshlr z22.b, p1/m, z22.b, z21.b
+ // vl128 state = 0x132fe792
+ __ dci(0x444b86b7); // uqrshl z23.h, p1/m, z23.h, z21.h
+ // vl128 state = 0xabd3d85c
+ __ dci(0x440b84a7); // uqrshl z7.b, p1/m, z7.b, z5.b
+ // vl128 state = 0x8f718992
+ __ dci(0x440b8085); // uqrshl z5.b, p0/m, z5.b, z4.b
+ // vl128 state = 0x1b05e694
+ __ dci(0x440b8687); // uqrshl z7.b, p1/m, z7.b, z20.b
+ // vl128 state = 0xd9a0c225
+ __ dci(0x440986cf); // uqshl z15.b, p1/m, z15.b, z22.b
+ // vl128 state = 0x98be170a
+ __ dci(0x440b87ce); // uqrshl z14.b, p1/m, z14.b, z30.b
+ // vl128 state = 0x0993d862
+ __ dci(0x440b838c); // uqrshl z12.b, p0/m, z12.b, z28.b
+ // vl128 state = 0xbc95a037
+ __ dci(0x440b839c); // uqrshl z28.b, p0/m, z28.b, z28.b
+ // vl128 state = 0x558159d9
+ __ dci(0x444b8314); // uqrshl z20.h, p0/m, z20.h, z24.h
+ // vl128 state = 0x53798c6b
+ __ dci(0x44498b1c); // uqshl z28.h, p2/m, z28.h, z24.h
+ // vl128 state = 0x83db6a7c
+ __ dci(0x44498b0c); // uqshl z12.h, p2/m, z12.h, z24.h
+ // vl128 state = 0x62bda6cb
+ __ dci(0x44438b0e); // urshl z14.h, p2/m, z14.h, z24.h
+ // vl128 state = 0xc04356eb
+ __ dci(0x44438986); // urshl z6.h, p2/m, z6.h, z12.h
+ // vl128 state = 0x0e2e6682
+ __ dci(0x444389e4); // urshl z4.h, p2/m, z4.h, z15.h
+ // vl128 state = 0xbb28cacd
+ __ dci(0x444391f4); // urshl z20.h, p4/m, z20.h, z15.h
+ // vl128 state = 0x5349f37a
+ __ dci(0x444391f6); // urshl z22.h, p4/m, z22.h, z15.h
+ // vl128 state = 0x99e66890
+ __ dci(0x44c39177); // urshl z23.d, p4/m, z23.d, z11.d
+ // vl128 state = 0x2d48a891
+ __ dci(0x44c79573); // urshlr z19.d, p5/m, z19.d, z11.d
+ // vl128 state = 0xd26e94f9
+ __ dci(0x04c79d63); // uqshl z3.d, p7/m, z3.d, #43
+ // vl128 state = 0x54801050
+ __ dci(0x04c78c67); // uqshl z7.d, p3/m, z7.d, #35
+ // vl128 state = 0xde9f357a
+ __ dci(0x04878c43); // uqshl z3.d, p3/m, z3.d, #2
+ // vl128 state = 0x59e5d53c
+ __ dci(0x44878c0b); // urshlr z11.s, p3/m, z11.s, z0.s
+ // vl128 state = 0x8cfa7532
+ __ dci(0x44878c03); // urshlr z3.s, p3/m, z3.s, z0.s
+ // vl128 state = 0xdb4e86b6
+ __ dci(0x44878d42); // urshlr z2.s, p3/m, z2.s, z10.s
+ // vl128 state = 0x07467a7c
+ __ dci(0x44878d4a); // urshlr z10.s, p3/m, z10.s, z10.s
+ // vl128 state = 0x6a4ad81c
+ __ dci(0x44879948); // urshlr z8.s, p6/m, z8.s, z10.s
+ // vl128 state = 0x91d7bdc0
+ __ dci(0x44879949); // urshlr z9.s, p6/m, z9.s, z10.s
+ // vl128 state = 0x2fe3b819
+ __ dci(0x44879bcb); // urshlr z11.s, p6/m, z11.s, z30.s
+ // vl128 state = 0x5c121b68
+ __ dci(0x04879b4f); // uqshl z15.d, p6/m, z15.d, #26
+ // vl128 state = 0xe678f4f7
+ __ dci(0x44879bdf); // urshlr z31.s, p6/m, z31.s, z30.s
+ // vl128 state = 0x6593da76
+ __ dci(0x4487935e); // urshlr z30.s, p4/m, z30.s, z26.s
+ // vl128 state = 0xb558ba57
+ __ dci(0x440f9356); // uqrshlr z22.b, p4/m, z22.b, z26.b
+ // vl128 state = 0x45d1775e
+ __ dci(0x440f93f7); // uqrshlr z23.b, p4/m, z23.b, z31.b
+ // vl128 state = 0x20974795
+ __ dci(0x448793f5); // urshlr z21.s, p4/m, z21.s, z31.s
+ // vl128 state = 0xeb0bc2ab
+ __ dci(0x448383fd); // urshl z29.s, p0/m, z29.s, z31.s
+ // vl128 state = 0x74557d81
+ __ dci(0x448b82f9); // uqrshl z25.s, p0/m, z25.s, z23.s
+ // vl128 state = 0x34518418
+ __ dci(0x448f82b8); // uqrshlr z24.s, p0/m, z24.s, z21.s
+ // vl128 state = 0x93e637f3
+ __ dci(0x448f82bc); // uqrshlr z28.s, p0/m, z28.s, z21.s
+ // vl128 state = 0x6e35e56a
+ __ dci(0x448f83fe); // uqrshlr z30.s, p0/m, z30.s, z31.s
+ // vl128 state = 0xf3c59bb1
+ __ dci(0x448d83ae); // uqshlr z14.s, p0/m, z14.s, z29.s
+ // vl128 state = 0x95b401a3
+ __ dci(0x448d83aa); // uqshlr z10.s, p0/m, z10.s, z29.s
+ // vl128 state = 0x56ec65b0
+ __ dci(0x448993ae); // uqshl z14.s, p4/m, z14.s, z29.s
+ // vl128 state = 0x28f6e4c6
+ __ dci(0x448993a6); // uqshl z6.s, p4/m, z6.s, z29.s
+ // vl128 state = 0x9ed5eaf3
+ __ dci(0x44c991a4); // uqshl z4.d, p4/m, z4.d, z13.d
+ // vl128 state = 0xa8512b00
+ __ dci(0x44c991a5); // uqshl z5.d, p4/m, z5.d, z13.d
+ // vl128 state = 0x49a10780
+ __ dci(0x44c991a1); // uqshl z1.d, p4/m, z1.d, z13.d
+ // vl128 state = 0x465a2cb4
+ __ dci(0x444b91a0); // uqrshl z0.h, p4/m, z0.h, z13.h
+ // vl128 state = 0x8f6dad8e
+ __ dci(0x444b91a1); // uqrshl z1.h, p4/m, z1.h, z13.h
+ // vl128 state = 0x50dec3f8
+ __ dci(0x440391a3); // urshl z3.b, p4/m, z3.b, z13.b
+ // vl128 state = 0xab2b5ad7
+ __ dci(0x448393a7); // urshl z7.s, p4/m, z7.s, z29.s
+ // vl128 state = 0x2ffd164f
+ __ dci(0x448393af); // urshl z15.s, p4/m, z15.s, z29.s
+ // vl128 state = 0x43a7959b
+ __ dci(0x448393ab); // urshl z11.s, p4/m, z11.s, z29.s
+ // vl128 state = 0xf9526723
+ __ dci(0x448f93af); // uqrshlr z15.s, p4/m, z15.s, z29.s
+ // vl128 state = 0xf9081b27
+ __ dci(0x448f93ae); // uqrshlr z14.s, p4/m, z14.s, z29.s
+ // vl128 state = 0x3a4f693e
+ __ dci(0x048793aa); // uqshl z10.d, p4/m, z10.d, #29
+ // vl128 state = 0xbba37d9a
+ __ dci(0x04c79388); // uqshl z8.d, p4/m, z8.d, #60
+ // vl128 state = 0x3b3f5fa4
+ __ dci(0x04c79380); // uqshl z0.d, p4/m, z0.d, #60
+ // vl128 state = 0xdac48ac2
+ __ dci(0x04878390); // uqshl z16.d, p0/m, z16.d, #28
+ // vl128 state = 0xe3c8148f
+ __ dci(0x44878794); // urshlr z20.s, p1/m, z20.s, z28.s
+ // vl128 state = 0xee2179ec
+ __ dci(0x04878384); // uqshl z4.d, p0/m, z4.d, #28
+ // vl128 state = 0xc6a3796c
+ __ dci(0x048787ac); // uqshl z12.d, p1/m, z12.d, #29
+ // vl128 state = 0x18e0fd43
+ __ dci(0x04c786ae); // uqshl z14.d, p1/m, z14.d, #53
+ // vl128 state = 0x9292503e
+ __ dci(0x04c786be); // uqshl z30.d, p1/m, z30.d, #53
+ // vl128 state = 0xc1ebe042
+ __ dci(0x44c782b6); // urshlr z22.d, p0/m, z22.d, z21.d
+ // vl128 state = 0x0badc025
+ __ dci(0x44c78a3e); // urshlr z30.d, p2/m, z30.d, z17.d
+ // vl128 state = 0x51b3b5ac
+ __ dci(0x04c78b3a); // uqshl z26.d, p2/m, z26.d, #57
+ // vl128 state = 0x334f52f8
+ __ dci(0x04c78832); // uqshl z18.d, p2/m, z18.d, #33
+ // vl128 state = 0xf95df0b7
+ __ dci(0x44cf8833); // uqrshlr z19.d, p2/m, z19.d, z1.d
+ // vl128 state = 0xda88a00a
+ __ dci(0x44cf9811); // uqrshlr z17.d, p6/m, z17.d, z0.d
+ // vl128 state = 0x1e642a4c
+ __ dci(0x44cf9c41); // uqrshlr z1.d, p7/m, z1.d, z2.d
+ // vl128 state = 0xeb7fe4bd
+ __ dci(0x444f8c45); // uqrshlr z5.h, p3/m, z5.h, z2.h
+ // vl128 state = 0x5a82d833
+ __ dci(0x44cf844d); // uqrshlr z13.d, p1/m, z13.d, z2.d
+ // vl128 state = 0x595d42a4
+ __ dci(0x44c7841d); // urshlr z29.d, p1/m, z29.d, z0.d
+ // vl128 state = 0x0b433688
+ __ dci(0x44c7805f); // urshlr z31.d, p0/m, z31.d, z2.d
+ // vl128 state = 0x14b8c29a
+ __ dci(0x44cf807b); // uqrshlr z27.d, p0/m, z27.d, z3.d
+ // vl128 state = 0x12a76015
+ __ dci(0x44c780eb); // urshlr z11.d, p0/m, z11.d, z7.d
+ // vl128 state = 0x73fa7d24
+ __ dci(0x44c794e3); // urshlr z3.d, p5/m, z3.d, z7.d
+ // vl128 state = 0x0a01c859
+ __ dci(0x04c795eb); // uqshl z11.d, p5/m, z11.d, #47
+ // vl128 state = 0x0e7024fd
+ __ dci(0x04c795e9); // uqshl z9.d, p5/m, z9.d, #47
+ // vl128 state = 0x9ca5cb63
+ __ dci(0x04c795f9); // uqshl z25.d, p5/m, z25.d, #47
+ // vl128 state = 0x4c60da07
+ __ dci(0x04c795fb); // uqshl z27.d, p5/m, z27.d, #47
+ // vl128 state = 0x71114c19
+ __ dci(0x04c799f3); // uqshl z19.d, p6/m, z19.d, #47
+ // vl128 state = 0x32d71e12
+ __ dci(0x04c79997); // uqshl z23.d, p6/m, z23.d, #44
+ // vl128 state = 0xab0c9051
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xab0c9051,
+ 0xc2455013,
+ 0x6e4b3f1e,
+ 0x631ce7ed,
+ 0x031e4f7f,
+ 0xa2be23bd,
+ 0x2f5f74b0,
+ 0x9e60f1ea,
+ 0xb1080595,
+ 0x953020c9,
+ 0x7a5bfffb,
+ 0xf0a27817,
+ 0x83904886,
+ 0x04620572,
+ 0xbcd5c8c9,
+ 0x3d4abe12,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_signed_sat_round_shift) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
+ __ dci(0x048687c6); // sqshl z6.d, p1/m, z6.d, #30
+ // vl128 state = 0xe81d8487
+ __ dci(0x048687c4); // sqshl z4.d, p1/m, z4.d, #30
+ // vl128 state = 0x47cc69b1
+ __ dci(0x04868385); // sqshl z5.d, p0/m, z5.d, #28
+ // vl128 state = 0xec4cab7b
+ __ dci(0x0486838d); // sqshl z13.d, p0/m, z13.d, #28
+ // vl128 state = 0x23b07ac8
+ __ dci(0x048681a9); // sqshl z9.d, p0/m, z9.d, #13
+ // vl128 state = 0xace4253d
+ __ dci(0x04068139); // sqshl z25.b, p0/m, z25.b, #1
+ // vl128 state = 0xf8f14a80
+ __ dci(0x440681b8); // srshlr z24.b, p0/m, z24.b, z13.b
+ // vl128 state = 0xa79d8fc1
+ __ dci(0x4406803a); // srshlr z26.b, p0/m, z26.b, z1.b
+ // vl128 state = 0xed9bb777
+ __ dci(0x4406808a); // srshlr z10.b, p0/m, z10.b, z4.b
+ // vl128 state = 0xbd1dfa2f
+ __ dci(0x440688da); // srshlr z26.b, p2/m, z26.b, z6.b
+ // vl128 state = 0x8f9b61e6
+ __ dci(0x448680db); // srshlr z27.s, p0/m, z27.s, z6.s
+ // vl128 state = 0x0a16f551
+ __ dci(0x440684d3); // srshlr z19.b, p1/m, z19.b, z6.b
+ // vl128 state = 0x0a764f12
+ __ dci(0x448694c3); // srshlr z3.s, p5/m, z3.s, z6.s
+ // vl128 state = 0x8d6f5613
+ __ dci(0x448e9cc7); // sqrshlr z7.s, p7/m, z7.s, z6.s
+ // vl128 state = 0xaf7b559b
+ __ dci(0x448e9ef7); // sqrshlr z23.s, p7/m, z23.s, z23.s
+ // vl128 state = 0x086d6430
+ __ dci(0x448e9673); // sqrshlr z19.s, p5/m, z19.s, z19.s
+ // vl128 state = 0x4a9a5736
+ __ dci(0x448a8663); // sqrshl z3.s, p1/m, z3.s, z19.s
+ // vl128 state = 0x19adf50e
+ __ dci(0x440a8e6b); // sqrshl z11.b, p3/m, z11.b, z19.b
+ // vl128 state = 0x4a01719c
+ __ dci(0x44028eef); // srshl z15.b, p3/m, z15.b, z23.b
+ // vl128 state = 0x1af6d72e
+ __ dci(0x44028e8b); // srshl z11.b, p3/m, z11.b, z20.b
+ // vl128 state = 0xeca2061d
+ __ dci(0x44828f8f); // srshl z15.s, p3/m, z15.s, z28.s
+ // vl128 state = 0x61059832
+ __ dci(0x44828f87); // srshl z7.s, p3/m, z7.s, z28.s
+ // vl128 state = 0x5e4d94cc
+ __ dci(0x44828a97); // srshl z23.s, p2/m, z23.s, z20.s
+ // vl128 state = 0xf5095aa8
+ __ dci(0x44828a93); // srshl z19.s, p2/m, z19.s, z20.s
+ // vl128 state = 0x155ff234
+ __ dci(0x44868a11); // srshlr z17.s, p2/m, z17.s, z16.s
+ // vl128 state = 0xf2844c7f
+ __ dci(0x44c68a90); // srshlr z16.d, p2/m, z16.d, z20.d
+ // vl128 state = 0xcf9f9508
+ __ dci(0x44c68a80); // srshlr z0.d, p2/m, z0.d, z20.d
+ // vl128 state = 0xd476915b
+ __ dci(0x44868a02); // srshlr z2.s, p2/m, z2.s, z16.s
+ // vl128 state = 0x9acbc986
+ __ dci(0x44868a12); // srshlr z18.s, p2/m, z18.s, z16.s
+ // vl128 state = 0xaf9e1114
+ __ dci(0x4486921a); // srshlr z26.s, p4/m, z26.s, z16.s
+ // vl128 state = 0x9d188add
+ __ dci(0x4486909e); // srshlr z30.s, p4/m, z30.s, z4.s
+ // vl128 state = 0xb41018d5
+ __ dci(0x448c9096); // sqshlr z22.s, p4/m, z22.s, z4.s
+ // vl128 state = 0x4ab51dea
+ __ dci(0x448890b4); // sqshl z20.s, p4/m, z20.s, z5.s
+ // vl128 state = 0x600dcc36
+ __ dci(0x448884bc); // sqshl z28.s, p1/m, z28.s, z5.s
+ // vl128 state = 0x84f37050
+ __ dci(0x44c88434); // sqshl z20.d, p1/m, z20.d, z1.d
+ // vl128 state = 0x1f19ce5a
+ __ dci(0x44cc8536); // sqshlr z22.d, p1/m, z22.d, z9.d
+ // vl128 state = 0xa51d3f31
+ __ dci(0x448c8517); // sqshlr z23.s, p1/m, z23.s, z8.s
+ // vl128 state = 0x8d431292
+ __ dci(0x448c8133); // sqshlr z19.s, p0/m, z19.s, z9.s
+ // vl128 state = 0xdd59917f
+ __ dci(0x448c8b23); // sqshlr z3.s, p2/m, z3.s, z25.s
+ // vl128 state = 0xfcdae7d4
+ __ dci(0x448c8b21); // sqshlr z1.s, p2/m, z1.s, z25.s
+ // vl128 state = 0x0f1239a5
+ __ dci(0x448c8b29); // sqshlr z9.s, p2/m, z9.s, z25.s
+ // vl128 state = 0xf6d1f180
+ __ dci(0x448c8b2b); // sqshlr z11.s, p2/m, z11.s, z25.s
+ // vl128 state = 0xe7a1af08
+ __ dci(0x448c8b89); // sqshlr z9.s, p2/m, z9.s, z28.s
+ // vl128 state = 0xa72666cb
+ __ dci(0x448c9bcb); // sqshlr z11.s, p6/m, z11.s, z30.s
+ // vl128 state = 0x9cae5fd7
+ __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s
+ // vl128 state = 0xda133b76
+ __ dci(0x04869b8e); // sqshl z14.d, p6/m, z14.d, #28
+ // vl128 state = 0xf8eb71c2
+ __ dci(0x44869bca); // srshlr z10.s, p6/m, z10.s, z30.s
+ // vl128 state = 0xbe561563
+ __ dci(0x44869ae2); // srshlr z2.s, p6/m, z2.s, z23.s
+ // vl128 state = 0x0c286f7e
+ __ dci(0x44869a46); // srshlr z6.s, p6/m, z6.s, z18.s
+ // vl128 state = 0x59da6464
+ __ dci(0x44869a47); // srshlr z7.s, p6/m, z7.s, z18.s
+ // vl128 state = 0x908e5664
+ __ dci(0x4486920f); // srshlr z15.s, p4/m, z15.s, z16.s
+ // vl128 state = 0x213d23db
+ __ dci(0x44869a87); // srshlr z7.s, p6/m, z7.s, z20.s
+ // vl128 state = 0xd81ea7fb
+ __ dci(0x44469a86); // srshlr z6.h, p6/m, z6.h, z20.h
+ // vl128 state = 0x27d44726
+ __ dci(0x44029a82); // srshl z2.b, p6/m, z2.b, z20.b
+ // vl128 state = 0x2187127f
+ __ dci(0x44069aa0); // srshlr z0.b, p6/m, z0.b, z21.b
+ // vl128 state = 0x68ba9323
+ __ dci(0x444692b0); // srshlr z16.h, p4/m, z16.h, z21.h
+ // vl128 state = 0x148619ff
+ __ dci(0x44468ab2); // srshlr z18.h, p2/m, z18.h, z21.h
+ // vl128 state = 0xae93eae6
+ __ dci(0x444698b6); // srshlr z22.h, p6/m, z22.h, z5.h
+ // vl128 state = 0x0b875035
+ __ dci(0x44469934); // srshlr z20.h, p6/m, z20.h, z9.h
+ // vl128 state = 0x559132ed
+ __ dci(0x0406993c); // sqshl z28.b, p6/m, z28.b, #1
+ // vl128 state = 0xec1782e4
+ __ dci(0x4406912c); // srshlr z12.b, p4/m, z12.b, z9.b
+ // vl128 state = 0x089d32a4
+ __ dci(0x440291ae); // srshl z14.b, p4/m, z14.b, z13.b
+ // vl128 state = 0xde257893
+ __ dci(0x44829126); // srshl z6.s, p4/m, z6.s, z9.s
+ // vl128 state = 0x318d27ef
+ __ dci(0x448a8127); // sqrshl z7.s, p0/m, z7.s, z9.s
+ // vl128 state = 0x1bc564fc
+ __ dci(0x448e8165); // sqrshlr z5.s, p0/m, z5.s, z11.s
+ // vl128 state = 0xa5e5c696
+ __ dci(0x44869161); // srshlr z1.s, p4/m, z1.s, z11.s
+ // vl128 state = 0xd64b6830
+ __ dci(0x44829120); // srshl z0.s, p4/m, z0.s, z9.s
+ // vl128 state = 0x107ca84d
+ __ dci(0x44829124); // srshl z4.s, p4/m, z4.s, z9.s
+ // vl128 state = 0xcd5688f3
+ __ dci(0x4482912c); // srshl z12.s, p4/m, z12.s, z9.s
+ // vl128 state = 0x88dee210
+ __ dci(0x44829128); // srshl z8.s, p4/m, z8.s, z9.s
+ // vl128 state = 0xfe8611fa
+ __ dci(0x44c69120); // srshlr z0.d, p4/m, z0.d, z9.d
+ // vl128 state = 0xe8b8cabd
+ __ dci(0x44ce9168); // sqrshlr z8.d, p4/m, z8.d, z11.d
+ // vl128 state = 0x269af804
+ __ dci(0x448e9069); // sqrshlr z9.s, p4/m, z9.s, z3.s
+ // vl128 state = 0x7d425704
+ __ dci(0x448e8461); // sqrshlr z1.s, p1/m, z1.s, z3.s
+ // vl128 state = 0x1577bd67
+ __ dci(0x448e8460); // sqrshlr z0.s, p1/m, z0.s, z3.s
+ // vl128 state = 0x6966617f
+ __ dci(0x448a8428); // sqrshl z8.s, p1/m, z8.s, z1.s
+ // vl128 state = 0x6c9cc508
+ __ dci(0x44ca8409); // sqrshl z9.d, p1/m, z9.d, z0.d
+ // vl128 state = 0xb3ea2e65
+ __ dci(0x44c68408); // srshlr z8.d, p1/m, z8.d, z0.d
+ // vl128 state = 0x1aef7620
+ __ dci(0x44c6840a); // srshlr z10.d, p1/m, z10.d, z0.d
+ // vl128 state = 0x63f2c5a3
+ __ dci(0x44cc840e); // sqshlr z14.d, p1/m, z14.d, z0.d
+ // vl128 state = 0xb54a8f94
+ __ dci(0x44cc8e1e); // sqshlr z30.d, p3/m, z30.d, z16.d
+ // vl128 state = 0xe247e0a3
+ __ dci(0x44c68e1a); // srshlr z26.d, p3/m, z26.d, z16.d
+ // vl128 state = 0xfb8bf060
+ __ dci(0x44c28a0a); // srshl z10.d, p2/m, z10.d, z16.d
+ // vl128 state = 0x829643e3
+ __ dci(0x44c68e0e); // srshlr z14.d, p3/m, z14.d, z16.d
+ // vl128 state = 0x8bd62d7b
+ __ dci(0x44c6881e); // srshlr z30.d, p2/m, z30.d, z0.d
+ // vl128 state = 0x4d8caca2
+ __ dci(0x44869816); // srshlr z22.s, p6/m, z22.s, z0.s
+ // vl128 state = 0x027f41ac
+ __ dci(0x44029817); // srshl z23.b, p6/m, z23.b, z0.b
+ // vl128 state = 0xab9c9627
+ __ dci(0x4402993f); // srshl z31.b, p6/m, z31.b, z9.b
+ // vl128 state = 0x42a71056
+ __ dci(0x4406991e); // srshlr z30.b, p6/m, z30.b, z8.b
+ // vl128 state = 0xdcdf1396
+ __ dci(0x44068d1f); // srshlr z31.b, p3/m, z31.b, z8.b
+ // vl128 state = 0x84fa5cac
+ __ dci(0x44068d1d); // srshlr z29.b, p3/m, z29.b, z8.b
+ // vl128 state = 0x1239cdae
+ __ dci(0x44468d2d); // srshlr z13.h, p3/m, z13.h, z9.h
+ // vl128 state = 0xae689b2f
+ __ dci(0x4446850f); // srshlr z15.h, p1/m, z15.h, z8.h
+ // vl128 state = 0x6330c9c2
+ __ dci(0x4446910e); // srshlr z14.h, p4/m, z14.h, z8.h
+ // vl128 state = 0x326ffb9f
+ __ dci(0x4446940f); // srshlr z15.h, p5/m, z15.h, z0.h
+ // vl128 state = 0x3f48f466
+ __ dci(0x44468487); // srshlr z7.h, p1/m, z7.h, z4.h
+ // vl128 state = 0x0d3b6c65
+ __ dci(0x444694b7); // srshlr z23.h, p5/m, z23.h, z5.h
+ // vl128 state = 0x5ef21cd8
+ __ dci(0x44469c93); // srshlr z19.h, p7/m, z19.h, z4.h
+ // vl128 state = 0x413d5573
+ __ dci(0x44069e92); // srshlr z18.b, p7/m, z18.b, z20.b
+ // vl128 state = 0xac59d0c3
+ __ dci(0x44469693); // srshlr z19.h, p5/m, z19.h, z20.h
+ // vl128 state = 0xb3969968
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xb3969968,
+ 0x8ba60941,
+ 0x53937d52,
+ 0xe6737b5d,
+ 0x8649cf1f,
+ 0xb7ee12ca,
+ 0x6fd03bd4,
+ 0x4a82eb52,
+ 0xc0d52997,
+ 0xb52a263f,
+ 0x70599fa2,
+ 0x68cd2ef1,
+ 0x57b84410,
+ 0x1072dde9,
+ 0xe39a23c8,
+ 0xeded9f88,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_usra) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x450ce41d); // usra z29.b, z0.b, #4
+ // vl128 state = 0x57e84943
+ __ dci(0x450ce635); // usra z21.b, z17.b, #4
+ // vl128 state = 0xc2696a7c
+ __ dci(0x45cce637); // usra z23.d, z17.d, #20
+ // vl128 state = 0x97aec47c
+ __ dci(0x458cee35); // ursra z21.d, z17.d, #52
+ // vl128 state = 0xab24864c
+ __ dci(0x450eee25); // ursra z5.b, z17.b, #2
+ // vl128 state = 0x8aab49c9
+ __ dci(0x458eef21); // ursra z1.d, z25.d, #50
+ // vl128 state = 0x3db09e7f
+ __ dci(0x458fef65); // ursra z5.d, z27.d, #49
+ // vl128 state = 0xa9905ae3
+ __ dci(0x459fef41); // ursra z1.d, z26.d, #33
+ // vl128 state = 0x624c2e4d
+ __ dci(0x459fe549); // usra z9.d, z10.d, #33
+ // vl128 state = 0x5a158f70
+ __ dci(0x459de561); // usra z1.d, z11.d, #35
+ // vl128 state = 0xf24ffa83
+ __ dci(0x451ce565); // usra z5.h, z11.h, #4
+ // vl128 state = 0x0213f9c7
+ __ dci(0x4519e564); // usra z4.h, z11.h, #7
+ // vl128 state = 0x8903ccf3
+ __ dci(0x4589e56c); // usra z12.d, z11.d, #55
+ // vl128 state = 0x3c0f6e72
+ __ dci(0x4589e56e); // usra z14.d, z11.d, #55
+ // vl128 state = 0x5d9787fc
+ __ dci(0x4589e56c); // usra z12.d, z11.d, #55
+ // vl128 state = 0x3bc6fced
+ __ dci(0x458bed64); // ursra z4.d, z11.d, #53
+ // vl128 state = 0x966476e2
+ __ dci(0x45dbed65); // ursra z5.d, z11.d, #5
+ // vl128 state = 0xf85c4247
+ __ dci(0x455bedf5); // ursra z21.s, z15.s, #5
+ // vl128 state = 0xd342f9ae
+ __ dci(0x450bedfd); // ursra z29.b, z15.b, #5
+ // vl128 state = 0xc03cb476
+ __ dci(0x4549edf9); // ursra z25.s, z15.s, #23
+ // vl128 state = 0x5649b073
+ __ dci(0x4549ede9); // ursra z9.s, z15.s, #23
+ // vl128 state = 0xce5a7dbb
+ __ dci(0x4549ed59); // ursra z25.s, z10.s, #23
+ // vl128 state = 0x8c98ee08
+ __ dci(0x4549ed5d); // ursra z29.s, z10.s, #23
+ // vl128 state = 0xd991a574
+ __ dci(0x45cded59); // ursra z25.d, z10.d, #19
+ // vl128 state = 0xebc24746
+ __ dci(0x45d9ed58); // ursra z24.d, z10.d, #7
+ // vl128 state = 0x145d5970
+ __ dci(0x45d8ec50); // ursra z16.d, z2.d, #8
+ // vl128 state = 0x8f65850c
+ __ dci(0x45c8ec60); // ursra z0.d, z3.d, #24
+ // vl128 state = 0xe510a1b4
+ __ dci(0x45c0ed61); // ursra z1.d, z11.d, #32
+ // vl128 state = 0xfef468e1
+ __ dci(0x45c8ec65); // ursra z5.d, z3.d, #24
+ // vl128 state = 0xa6754589
+ __ dci(0x45c0e464); // usra z4.d, z3.d, #32
+ // vl128 state = 0x2b4cd23a
+ __ dci(0x45c0e4a5); // usra z5.d, z5.d, #32
+ // vl128 state = 0xfa58fea0
+ __ dci(0x45c0e4a1); // usra z1.d, z5.d, #32
+ // vl128 state = 0x015c4435
+ __ dci(0x45c0e4b1); // usra z17.d, z5.d, #32
+ // vl128 state = 0x67271050
+ __ dci(0x45c2ecb3); // ursra z19.d, z5.d, #30
+ // vl128 state = 0x1d3631c3
+ __ dci(0x45c0ece3); // ursra z3.d, z7.d, #32
+ // vl128 state = 0x646e0e43
+ __ dci(0x45caece7); // ursra z7.d, z7.d, #22
+ // vl128 state = 0x104bf393
+ __ dci(0x458aeee3); // ursra z3.d, z23.d, #54
+ // vl128 state = 0xbac8c54b
+ __ dci(0x454aeee1); // ursra z1.s, z23.s, #22
+ // vl128 state = 0x5c2a40db
+ __ dci(0x4508eee9); // ursra z9.b, z23.b, #8
+ // vl128 state = 0xe117d81a
+ __ dci(0x4518ece1); // ursra z1.h, z7.h, #8
+ // vl128 state = 0xeb43265d
+ __ dci(0x451cede0); // ursra z0.h, z15.h, #4
+ // vl128 state = 0xd5c8d09e
+ __ dci(0x4598edf0); // ursra z16.d, z15.d, #40
+ // vl128 state = 0x0c060220
+ __ dci(0x451cede0); // ursra z0.h, z15.h, #4
+ // vl128 state = 0x0ea52d2d
+ __ dci(0x459cefe8); // ursra z8.d, z31.d, #36
+ // vl128 state = 0xa6a7e977
+ __ dci(0x459ce5f8); // usra z24.d, z15.d, #36
+ // vl128 state = 0xb0192caf
+ __ dci(0x458cedfa); // ursra z26.d, z15.d, #52
+ // vl128 state = 0x154fce29
+ __ dci(0x458cedfe); // ursra z30.d, z15.d, #52
+ // vl128 state = 0x369cc3e1
+ __ dci(0x450cedb6); // ursra z22.b, z13.b, #4
+ // vl128 state = 0xf613cb4b
+ __ dci(0x450cedb4); // ursra z20.b, z13.b, #4
+ // vl128 state = 0xd075c8a9
+ __ dci(0x458eeda4); // ursra z4.d, z13.d, #50
+ // vl128 state = 0xc9366682
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xc9366682,
+ 0xaf202cff,
+ 0x0e90a7c4,
+ 0xa8c89f40,
+ 0xc7bb56ad,
+ 0xa203dd34,
+ 0xf3b3a749,
+ 0xf16c9d5f,
+ 0x9929dea8,
+ 0xd652c693,
+ 0xe76f701b,
+ 0xe2fe20a3,
+ 0x07182afb,
+ 0x816b928f,
+ 0x52baf33f,
+ 0x9ef46875,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_ssra) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x450ce01d); // ssra z29.b, z0.b, #4
+ // vl128 state = 0xdf461c2b
+ __ dci(0x450ce235); // ssra z21.b, z17.b, #4
+ // vl128 state = 0xd28868a9
+ __ dci(0x45cce237); // ssra z23.d, z17.d, #20
+ // vl128 state = 0x874fc6a9
+ __ dci(0x458cea35); // srsra z21.d, z17.d, #52
+ // vl128 state = 0xb848785b
+ __ dci(0x450eea25); // srsra z5.b, z17.b, #2
+ // vl128 state = 0x8bca62e4
+ __ dci(0x458eeb21); // srsra z1.d, z25.d, #50
+ // vl128 state = 0x3cd1b552
+ __ dci(0x458feb65); // srsra z5.d, z27.d, #49
+ // vl128 state = 0xd78844fb
+ __ dci(0x459feb41); // srsra z1.d, z26.d, #33
+ // vl128 state = 0xa948dc2f
+ __ dci(0x459fe149); // ssra z9.d, z10.d, #33
+ // vl128 state = 0x709a83f1
+ __ dci(0x459de161); // ssra z1.d, z11.d, #35
+ // vl128 state = 0x1c21e4f6
+ __ dci(0x451ce165); // ssra z5.h, z11.h, #4
+ // vl128 state = 0x72288f41
+ __ dci(0x4519e164); // ssra z4.h, z11.h, #7
+ // vl128 state = 0x9a8c4c8c
+ __ dci(0x4589e16c); // ssra z12.d, z11.d, #55
+ // vl128 state = 0x872585d4
+ __ dci(0x4589e16e); // ssra z14.d, z11.d, #55
+ // vl128 state = 0xd237aaa0
+ __ dci(0x4589e16c); // ssra z12.d, z11.d, #55
+ // vl128 state = 0x1c828333
+ __ dci(0x458be964); // srsra z4.d, z11.d, #53
+ // vl128 state = 0xc190178f
+ __ dci(0x45dbe965); // srsra z5.d, z11.d, #5
+ // vl128 state = 0xe9e81bda
+ __ dci(0x455be9f5); // srsra z21.s, z15.s, #5
+ // vl128 state = 0x8e58c7a1
+ __ dci(0x450be9fd); // srsra z29.b, z15.b, #5
+ // vl128 state = 0x904b404b
+ __ dci(0x4549e9f9); // srsra z25.s, z15.s, #23
+ // vl128 state = 0x35a60481
+ __ dci(0x4549e9e9); // srsra z9.s, z15.s, #23
+ // vl128 state = 0x6911448b
+ __ dci(0x4549e959); // srsra z25.s, z10.s, #23
+ // vl128 state = 0xdb384324
+ __ dci(0x4549e95d); // srsra z29.s, z10.s, #23
+ // vl128 state = 0x16acd8ee
+ __ dci(0x45cde959); // srsra z25.d, z10.d, #19
+ // vl128 state = 0x56bf7bda
+ __ dci(0x45d9e958); // srsra z24.d, z10.d, #7
+ // vl128 state = 0x6a713fa6
+ __ dci(0x45d8e850); // srsra z16.d, z2.d, #8
+ // vl128 state = 0xa6394cf3
+ __ dci(0x45c8e860); // srsra z0.d, z3.d, #24
+ // vl128 state = 0x829c3d2a
+ __ dci(0x45c0e961); // srsra z1.d, z11.d, #32
+ // vl128 state = 0x006d1904
+ __ dci(0x45c8e865); // srsra z5.d, z3.d, #24
+ // vl128 state = 0xcc7dffaf
+ __ dci(0x45c0e064); // ssra z4.d, z3.d, #32
+ // vl128 state = 0xc9eaddd0
+ __ dci(0x45c0e0a5); // ssra z5.d, z5.d, #32
+ // vl128 state = 0x643145e1
+ __ dci(0x45c0e0a1); // ssra z1.d, z5.d, #32
+ // vl128 state = 0x03f4c42e
+ __ dci(0x45c0e0b1); // ssra z17.d, z5.d, #32
+ // vl128 state = 0x5a8cff35
+ __ dci(0x45c2e8b3); // srsra z19.d, z5.d, #30
+ // vl128 state = 0x3ee63e9f
+ __ dci(0x45c0e8e3); // srsra z3.d, z7.d, #32
+ // vl128 state = 0x687d943b
+ __ dci(0x45cae8e7); // srsra z7.d, z7.d, #22
+ // vl128 state = 0xf5a19cb2
+ __ dci(0x458aeae3); // srsra z3.d, z23.d, #54
+ // vl128 state = 0xd1371248
+ __ dci(0x454aeae1); // srsra z1.s, z23.s, #22
+ // vl128 state = 0xdb83ef8b
+ __ dci(0x455ae8e9); // srsra z9.s, z7.s, #6
+ // vl128 state = 0xc831a54c
+ __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2
+ // vl128 state = 0x4342b823
+ __ dci(0x45dae9f8); // srsra z24.d, z15.d, #6
+ // vl128 state = 0x52a7151a
+ __ dci(0x455ee9e8); // srsra z8.s, z15.s, #2
+ // vl128 state = 0xde8110e0
+ __ dci(0x45deebe0); // srsra z0.d, z31.d, #2
+ // vl128 state = 0xd2b28e81
+ __ dci(0x45dee1f0); // ssra z16.d, z15.d, #2
+ // vl128 state = 0x56d1c366
+ __ dci(0x45cee9f2); // srsra z18.d, z15.d, #18
+ // vl128 state = 0x53537689
+ __ dci(0x45cee9f6); // srsra z22.d, z15.d, #18
+ // vl128 state = 0x5e410508
+ __ dci(0x454ee9be); // srsra z30.s, z13.s, #18
+ // vl128 state = 0x06245094
+ __ dci(0x454ee9bc); // srsra z28.s, z13.s, #18
+ // vl128 state = 0xb92b3929
+ __ dci(0x45cce9ac); // srsra z12.d, z13.d, #20
+ // vl128 state = 0xfe6a2830
+ __ dci(0x45cde93c); // srsra z28.d, z9.d, #19
+ // vl128 state = 0x737461a1
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x737461a1,
+ 0xe1ef707c,
+ 0x9760ba4e,
+ 0x782dd4cd,
+ 0xe793d0c2,
+ 0x991e0de7,
+ 0x34627e21,
+ 0x76c89433,
+ 0x96c9f4ce,
+ 0x38ec4b6f,
+ 0x7aee3ec7,
+ 0x665f9b94,
+ 0x8e166fc3,
+ 0xb4461fac,
+ 0x215de9dc,
+ 0xc23ef1f9,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sat_arith) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44df9df9); // uqsubr z25.d, p7/m, z25.d, z15.d
+ // vl128 state = 0x7670ac87
+ __ dci(0x445f9db1); // uqsubr z17.h, p7/m, z17.h, z13.h
+ // vl128 state = 0x3c5b39fe
+ __ dci(0x441f99a1); // uqsubr z1.b, p6/m, z1.b, z13.b
+ // vl128 state = 0x5df43635
+ __ dci(0x441d9ba0); // usqadd z0.b, p6/m, z0.b, z29.b
+ // vl128 state = 0x737bc7a5
+ __ dci(0x441d9ba8); // usqadd z8.b, p6/m, z8.b, z29.b
+ // vl128 state = 0xba69890b
+ __ dci(0x441d9bb8); // usqadd z24.b, p6/m, z24.b, z29.b
+ // vl128 state = 0x3f81c19d
+ __ dci(0x441d8b30); // usqadd z16.b, p2/m, z16.b, z25.b
+ // vl128 state = 0x076c5fc1
+ __ dci(0x441d8a14); // usqadd z20.b, p2/m, z20.b, z16.b
+ // vl128 state = 0x67df29dd
+ __ dci(0x449d8215); // usqadd z21.s, p0/m, z21.s, z16.s
+ // vl128 state = 0x663b236f
+ __ dci(0x449d8205); // usqadd z5.s, p0/m, z5.s, z16.s
+ // vl128 state = 0xe58d41d0
+ __ dci(0x449d8201); // usqadd z1.s, p0/m, z1.s, z16.s
+ // vl128 state = 0x82f89d40
+ __ dci(0x449c8a09); // suqadd z9.s, p2/m, z9.s, z16.s
+ // vl128 state = 0xa0218390
+ __ dci(0x44dd8a0d); // usqadd z13.d, p2/m, z13.d, z16.d
+ // vl128 state = 0xfab22f04
+ __ dci(0x44d98a2c); // uqadd z12.d, p2/m, z12.d, z17.d
+ // vl128 state = 0x70911fc9
+ __ dci(0x44598a0d); // uqadd z13.h, p2/m, z13.h, z16.h
+ // vl128 state = 0xcc12ec49
+ __ dci(0x44d99a05); // uqadd z5.d, p6/m, z5.d, z16.d
+ // vl128 state = 0x31fef46f
+ __ dci(0x44d99004); // uqadd z4.d, p4/m, z4.d, z0.d
+ // vl128 state = 0xf81448db
+ __ dci(0x44d98020); // uqadd z0.d, p0/m, z0.d, z1.d
+ // vl128 state = 0xe6fe9d31
+ __ dci(0x44d980e1); // uqadd z1.d, p0/m, z1.d, z7.d
+ // vl128 state = 0x76fecfc2
+ __ dci(0x44d981c0); // uqadd z0.d, p0/m, z0.d, z14.d
+ // vl128 state = 0x4066a558
+ __ dci(0x44d98161); // uqadd z1.d, p0/m, z1.d, z11.d
+ // vl128 state = 0x0d3a1487
+ __ dci(0x44d98031); // uqadd z17.d, p0/m, z17.d, z1.d
+ // vl128 state = 0x061b4aed
+ __ dci(0x44d98039); // uqadd z25.d, p0/m, z25.d, z1.d
+ // vl128 state = 0x02172a17
+ __ dci(0x44d98029); // uqadd z9.d, p0/m, z9.d, z1.d
+ // vl128 state = 0xebe138b3
+ __ dci(0x44d8800d); // sqadd z13.d, p0/m, z13.d, z0.d
+ // vl128 state = 0x73f0114b
+ __ dci(0x44d8828f); // sqadd z15.d, p0/m, z15.d, z20.d
+ // vl128 state = 0x7a8689e0
+ __ dci(0x44d8829f); // sqadd z31.d, p0/m, z31.d, z20.d
+ // vl128 state = 0x0800ae49
+ __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d
+ // vl128 state = 0x9b733fff
+ __ dci(0x44d88e8b); // sqadd z11.d, p3/m, z11.d, z20.d
+ // vl128 state = 0x6d01eb90
+ __ dci(0x44d88e8f); // sqadd z15.d, p3/m, z15.d, z20.d
+ // vl128 state = 0x337692b3
+ __ dci(0x44d8968e); // sqadd z14.d, p5/m, z14.d, z20.d
+ // vl128 state = 0xcd4478b6
+ __ dci(0x44d886ca); // sqadd z10.d, p1/m, z10.d, z22.d
+ // vl128 state = 0x335fd099
+ __ dci(0x44dc87ce); // suqadd z14.d, p1/m, z14.d, z30.d
+ // vl128 state = 0x0d3b6403
+ __ dci(0x44de8fcf); // sqsubr z15.d, p3/m, z15.d, z30.d
+ // vl128 state = 0x41a1073f
+ __ dci(0x449e9fcd); // sqsubr z13.s, p7/m, z13.s, z30.s
+ // vl128 state = 0x5a4b1c22
+ __ dci(0x445e9fcf); // sqsubr z15.h, p7/m, z15.h, z30.h
+ // vl128 state = 0x5a08ccf1
+ __ dci(0x441e9ece); // sqsubr z14.b, p7/m, z14.b, z22.b
+ // vl128 state = 0x3f3c700c
+ __ dci(0x441e8cde); // sqsubr z30.b, p3/m, z30.b, z6.b
+ // vl128 state = 0x3b32b296
+ __ dci(0x441e88fa); // sqsubr z26.b, p2/m, z26.b, z7.b
+ // vl128 state = 0x7a6472e3
+ __ dci(0x441f98f8); // uqsubr z24.b, p6/m, z24.b, z7.b
+ // vl128 state = 0x1d72f5ea
+ __ dci(0x441f98fc); // uqsubr z28.b, p6/m, z28.b, z7.b
+ // vl128 state = 0x0245804b
+ __ dci(0x441b9afe); // uqsub z30.b, p6/m, z30.b, z23.b
+ // vl128 state = 0x8c7ac3d7
+ __ dci(0x441b9afc); // uqsub z28.b, p6/m, z28.b, z23.b
+ // vl128 state = 0xa96d65cb
+ __ dci(0x449b9a74); // uqsub z20.s, p6/m, z20.s, z19.s
+ // vl128 state = 0x261eb58f
+ __ dci(0x449a9b75); // sqsub z21.s, p6/m, z21.s, z27.s
+ // vl128 state = 0x3464e3e5
+ __ dci(0x449a9b7d); // sqsub z29.s, p6/m, z29.s, z27.s
+ // vl128 state = 0xfe3ab427
+ __ dci(0x445a9b79); // sqsub z25.h, p6/m, z25.h, z27.h
+ // vl128 state = 0x609eef3a
+ __ dci(0x445a9b7d); // sqsub z29.h, p6/m, z29.h, z27.h
+ // vl128 state = 0x0e6d6940
+ __ dci(0x445e9b5f); // sqsubr z31.h, p6/m, z31.h, z26.h
+ // vl128 state = 0x60a375e7
+ __ dci(0x441e8b5b); // sqsubr z27.b, p2/m, z27.b, z26.b
+ // vl128 state = 0xea9bd16f
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xea9bd16f,
+ 0x1296119e,
+ 0x00aaf6dc,
+ 0xb6ce0579,
+ 0xdb3d0829,
+ 0x119f52d0,
+ 0xf697dcd8,
+ 0x2c46a66c,
+ 0x7d838497,
+ 0x6cd68fb3,
+ 0xf98a5c79,
+ 0x51685054,
+ 0xa9494104,
+ 0x8d012936,
+ 0x32726258,
+ 0x091f1956,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_pair_arith) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 64 * kInstructionSize);
+ __ dci(0x4414b214); // smaxp z20.b, p4/m, z20.b, z16.b
+ // vl128 state = 0x90adc6c9
+ __ dci(0x4414ba5c); // smaxp z28.b, p6/m, z28.b, z18.b
+ // vl128 state = 0x0e41b2b9
+ __ dci(0x4454ba0c); // smaxp z12.h, p6/m, z12.h, z16.h
+ // vl128 state = 0x472160b8
+ __ dci(0x4454ba64); // smaxp z4.h, p6/m, z4.h, z19.h
+ // vl128 state = 0x4f485ba3
+ __ dci(0x44d4bb65); // smaxp z5.d, p6/m, z5.d, z27.d
+ // vl128 state = 0x432f5185
+ __ dci(0x4456bb64); // sminp z4.h, p6/m, z4.h, z27.h
+ // vl128 state = 0x01bd324a
+ __ dci(0x4455bb74); // umaxp z20.h, p6/m, z20.h, z27.h
+ // vl128 state = 0xaf795389
+ __ dci(0x4451bb35); // addp z21.h, p6/m, z21.h, z25.h
+ // vl128 state = 0x5f4be111
+ __ dci(0x4451ab71); // addp z17.h, p2/m, z17.h, z27.h
+ // vl128 state = 0xc16a8d03
+ __ dci(0x4451ba75); // addp z21.h, p6/m, z21.h, z19.h
+ // vl128 state = 0x8cd36853
+ __ dci(0x4451b225); // addp z5.h, p4/m, z5.h, z17.h
+ // vl128 state = 0xea3d5389
+ __ dci(0x4455b627); // umaxp z7.h, p5/m, z7.h, z17.h
+ // vl128 state = 0xbb42a8e1
+ __ dci(0x4415b426); // umaxp z6.b, p5/m, z6.b, z1.b
+ // vl128 state = 0x485ca761
+ __ dci(0x4415b224); // umaxp z4.b, p4/m, z4.b, z17.b
+ // vl128 state = 0x6bcfd641
+ __ dci(0x4455b02c); // umaxp z12.h, p4/m, z12.h, z1.h
+ // vl128 state = 0x84485a9f
+ __ dci(0x4455a12d); // umaxp z13.h, p0/m, z13.h, z9.h
+ // vl128 state = 0xed43519f
+ __ dci(0x4455b33d); // umaxp z29.h, p4/m, z29.h, z25.h
+ // vl128 state = 0xcc0b7c40
+ __ dci(0x4455b7b9); // umaxp z25.h, p5/m, z25.h, z29.h
+ // vl128 state = 0xe1c14517
+ __ dci(0x4454b6b8); // smaxp z24.h, p5/m, z24.h, z21.h
+ // vl128 state = 0x4c5e9f3c
+ __ dci(0x44d4b4bc); // smaxp z28.d, p5/m, z28.d, z5.d
+ // vl128 state = 0x7530a2f7
+ __ dci(0x44d4b4bd); // smaxp z29.d, p5/m, z29.d, z5.d
+ // vl128 state = 0x37e61b68
+ __ dci(0x44d4b5ed); // smaxp z13.d, p5/m, z13.d, z15.d
+ // vl128 state = 0xb592b6e9
+ __ dci(0x4455b5fd); // umaxp z29.h, p5/m, z29.h, z15.h
+ // vl128 state = 0xe7f9e492
+ __ dci(0x4415b57f); // umaxp z31.b, p5/m, z31.b, z11.b
+ // vl128 state = 0xe4e7b644
+ __ dci(0x4411b5fe); // addp z30.b, p5/m, z30.b, z15.b
+ // vl128 state = 0x4bfe144d
+ __ dci(0x4411a576); // addp z22.b, p1/m, z22.b, z11.b
+ // vl128 state = 0xb1813df8
+ __ dci(0x4455a566); // umaxp z6.h, p1/m, z6.h, z11.h
+ // vl128 state = 0x4aa8b50e
+ __ dci(0x4455adf6); // umaxp z22.h, p3/m, z22.h, z15.h
+ // vl128 state = 0xfc13568a
+ __ dci(0x4454acfe); // smaxp z30.h, p3/m, z30.h, z7.h
+ // vl128 state = 0x3aac7365
+ __ dci(0x4454acff); // smaxp z31.h, p3/m, z31.h, z7.h
+ // vl128 state = 0x610991cf
+ __ dci(0x44d4a8fb); // smaxp z27.d, p2/m, z27.d, z7.d
+ // vl128 state = 0x36581f26
+ __ dci(0x4456a8f3); // sminp z19.h, p2/m, z19.h, z7.h
+ // vl128 state = 0x249bb813
+ __ dci(0x4457a8b1); // uminp z17.h, p2/m, z17.h, z5.h
+ // vl128 state = 0xd48d6d88
+ __ dci(0x4457a8b5); // uminp z21.h, p2/m, z21.h, z5.h
+ // vl128 state = 0x1628fb6e
+ __ dci(0x4456a8f7); // sminp z23.h, p2/m, z23.h, z7.h
+ // vl128 state = 0x0bd3c76b
+ __ dci(0x4456a89f); // sminp z31.h, p2/m, z31.h, z4.h
+ // vl128 state = 0xf09d21e4
+ __ dci(0x4456aa0f); // sminp z15.h, p2/m, z15.h, z16.h
+ // vl128 state = 0xd2a92168
+ __ dci(0x4456b807); // sminp z7.h, p6/m, z7.h, z0.h
+ // vl128 state = 0x009d0ac8
+ __ dci(0x4456bc26); // sminp z6.h, p7/m, z6.h, z1.h
+ // vl128 state = 0x716ddc73
+ __ dci(0x4456beae); // sminp z14.h, p7/m, z14.h, z21.h
+ // vl128 state = 0x35a4d900
+ __ dci(0x4416b6ac); // sminp z12.b, p5/m, z12.b, z21.b
+ // vl128 state = 0x7929e077
+ __ dci(0x4416b6bc); // sminp z28.b, p5/m, z28.b, z21.b
+ // vl128 state = 0x259195ca
+ __ dci(0x4417b694); // uminp z20.b, p5/m, z20.b, z20.b
+ // vl128 state = 0x5cc3927b
+ __ dci(0x4417b684); // uminp z4.b, p5/m, z4.b, z20.b
+ // vl128 state = 0x2e7c4b88
+ __ dci(0x4415b6a0); // umaxp z0.b, p5/m, z0.b, z21.b
+ // vl128 state = 0x1478d524
+ __ dci(0x4415a690); // umaxp z16.b, p1/m, z16.b, z20.b
+ // vl128 state = 0xc3ac4a89
+ __ dci(0x4415b614); // umaxp z20.b, p5/m, z20.b, z16.b
+ // vl128 state = 0xb94a5aeb
+ __ dci(0x4415b675); // umaxp z21.b, p5/m, z21.b, z19.b
+ // vl128 state = 0xabeed92b
+ __ dci(0x4415a63d); // umaxp z29.b, p1/m, z29.b, z17.b
+ // vl128 state = 0xe36835ea
+ __ dci(0x4415a63c); // umaxp z28.b, p1/m, z28.b, z17.b
+ // vl128 state = 0x087002bb
+ __ dci(0x4455a61d); // umaxp z29.h, p1/m, z29.h, z16.h
+ // vl128 state = 0x17388ea4
+ __ dci(0x4451ae1f); // addp z31.h, p3/m, z31.h, z16.h
+ // vl128 state = 0x86ee7dbe
+ __ dci(0x4451ae1b); // addp z27.h, p3/m, z27.h, z16.h
+ // vl128 state = 0x9846169e
+ __ dci(0x4451bc0b); // addp z11.h, p7/m, z11.h, z0.h
+ // vl128 state = 0x5dc31eb0
+ __ dci(0x4455bc4f); // umaxp z15.h, p7/m, z15.h, z2.h
+ // vl128 state = 0x9ec9086c
+ __ dci(0x4455bf47); // umaxp z7.h, p7/m, z7.h, z26.h
+ // vl128 state = 0xf3a2766b
+ __ dci(0x44d5b743); // umaxp z3.d, p5/m, z3.d, z26.d
+ // vl128 state = 0x1ce44f7e
+ __ dci(0x44d5b7e2); // umaxp z2.d, p5/m, z2.d, z31.d
+ // vl128 state = 0xf121f7c0
+ __ dci(0x44d5b7e0); // umaxp z0.d, p5/m, z0.d, z31.d
+ // vl128 state = 0x4ac0d4f3
+ __ dci(0x44d5b670); // umaxp z16.d, p5/m, z16.d, z19.d
+ // vl128 state = 0xdb0d62f5
+ __ dci(0x44d1b272); // addp z18.d, p4/m, z18.d, z19.d
+ // vl128 state = 0x34b0c018
+ __ dci(0x44d1be76); // addp z22.d, p7/m, z22.d, z19.d
+ // vl128 state = 0x1673f380
+ __ dci(0x44d1b772); // addp z18.d, p5/m, z18.d, z27.d
+ // vl128 state = 0xe3e67205
+ __ dci(0x44d1b162); // addp z2.d, p4/m, z2.d, z11.d
+ // vl128 state = 0x42907adc
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x42907adc,
+ 0xee2f21f5,
+ 0xcbfa0af4,
+ 0x42e7c862,
+ 0x10ef537f,
+ 0x83461e96,
+ 0x2dca0c37,
+ 0xf2080504,
+ 0xf615d956,
+ 0x1732775a,
+ 0x491fec07,
+ 0xf9e33ada,
+ 0x324435d7,
+ 0x08a9c2ca,
+ 0x87ce3994,
+ 0x338adb5d,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_extract_narrow) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 64 * kInstructionSize);
+ __ dci(0x45284000); // sqxtnb z0.b, z0.h
+ // vl128 state = 0x874f147b
+ __ dci(0x45284228); // sqxtnb z8.b, z17.h
+ // vl128 state = 0xf694d31e
+ __ dci(0x45284820); // uqxtnb z0.b, z1.h
+ // vl128 state = 0x5d25df42
+ __ dci(0x45304821); // uqxtnb z1.h, z1.s
+ // vl128 state = 0x87eb933f
+ __ dci(0x45304823); // uqxtnb z3.h, z1.s
+ // vl128 state = 0x137eddc9
+ __ dci(0x45604822); // uqxtnb z2.s, z1.d
+ // vl128 state = 0x26e237a3
+ __ dci(0x45604d26); // uqxtnt z6.s, z9.d
+ // vl128 state = 0x72bcf361
+ __ dci(0x45304d2e); // uqxtnt z14.h, z9.s
+ // vl128 state = 0x5bcdd232
+ __ dci(0x45304d3e); // uqxtnt z30.h, z9.s
+ // vl128 state = 0x9a695f7e
+ __ dci(0x453049bc); // uqxtnb z28.h, z13.s
+ // vl128 state = 0x9c2fa230
+ __ dci(0x453049b8); // uqxtnb z24.h, z13.s
+ // vl128 state = 0xb590179f
+ __ dci(0x45304979); // uqxtnb z25.h, z11.s
+ // vl128 state = 0xc8987735
+ __ dci(0x4530497d); // uqxtnb z29.h, z11.s
+ // vl128 state = 0x380f8730
+ __ dci(0x4530496d); // uqxtnb z13.h, z11.s
+ // vl128 state = 0x45bf22d4
+ __ dci(0x45304565); // sqxtnt z5.h, z11.s
+ // vl128 state = 0xd9237f41
+ __ dci(0x45304f75); // uqxtnt z21.h, z27.s
+ // vl128 state = 0x0726a49b
+ __ dci(0x45304f71); // uqxtnt z17.h, z27.s
+ // vl128 state = 0xcbc547e0
+ __ dci(0x45304f73); // uqxtnt z19.h, z27.s
+ // vl128 state = 0x0b16d843
+ __ dci(0x45284f72); // uqxtnt z18.b, z27.h
+ // vl128 state = 0xea84ff1f
+ __ dci(0x45284f7a); // uqxtnt z26.b, z27.h
+ // vl128 state = 0x4bdb094d
+ __ dci(0x45284fca); // uqxtnt z10.b, z30.h
+ // vl128 state = 0x5986f190
+ __ dci(0x45284b8b); // uqxtnb z11.b, z28.h
+ // vl128 state = 0xb40f0b26
+ __ dci(0x45284bef); // uqxtnb z15.b, z31.h
+ // vl128 state = 0x7abef2b5
+ __ dci(0x45284fae); // uqxtnt z14.b, z29.h
+ // vl128 state = 0x79503b36
+ __ dci(0x45284fac); // uqxtnt z12.b, z29.h
+ // vl128 state = 0x481a6879
+ __ dci(0x45284eed); // uqxtnt z13.b, z23.h
+ // vl128 state = 0x32da844c
+ __ dci(0x45284ee9); // uqxtnt z9.b, z23.h
+ // vl128 state = 0xb8438ca7
+ __ dci(0x45284ef9); // uqxtnt z25.b, z23.h
+ // vl128 state = 0x4aa26674
+ __ dci(0x45284cd1); // uqxtnt z17.b, z6.h
+ // vl128 state = 0xc5411d78
+ __ dci(0x45284cd5); // uqxtnt z21.b, z6.h
+ // vl128 state = 0xee446689
+ __ dci(0x45284ad4); // uqxtnb z20.b, z22.h
+ // vl128 state = 0x66ef53ef
+ __ dci(0x45604adc); // uqxtnb z28.s, z22.d
+ // vl128 state = 0xa894f4d4
+ __ dci(0x45604ade); // uqxtnb z30.s, z22.d
+ // vl128 state = 0x50215eb8
+ __ dci(0x456040dc); // sqxtnb z28.s, z6.d
+ // vl128 state = 0x5ee8464d
+ __ dci(0x456048f4); // uqxtnb z20.s, z7.d
+ // vl128 state = 0xee2ca07b
+ __ dci(0x45604c75); // uqxtnt z21.s, z3.d
+ // vl128 state = 0x0e81e7e0
+ __ dci(0x45604cb1); // uqxtnt z17.s, z5.d
+ // vl128 state = 0x5c448cac
+ __ dci(0x45604e33); // uqxtnt z19.s, z17.d
+ // vl128 state = 0xcd0d561e
+ __ dci(0x45604e23); // uqxtnt z3.s, z17.d
+ // vl128 state = 0x7b8b2204
+ __ dci(0x45604cab); // uqxtnt z11.s, z5.d
+ // vl128 state = 0x418cec7f
+ __ dci(0x45604caa); // uqxtnt z10.s, z5.d
+ // vl128 state = 0x37064bb6
+ __ dci(0x45604efa); // uqxtnt z26.s, z23.d
+ // vl128 state = 0xc83ef05d
+ __ dci(0x456046db); // sqxtnt z27.s, z22.d
+ // vl128 state = 0xe30a1f0f
+ __ dci(0x456046da); // sqxtnt z26.s, z22.d
+ // vl128 state = 0xe10b92fa
+ __ dci(0x4560424a); // sqxtnb z10.s, z18.d
+ // vl128 state = 0x2396410c
+ __ dci(0x45604a08); // uqxtnb z8.s, z16.d
+ // vl128 state = 0xf4ae5ad5
+ __ dci(0x45304a00); // uqxtnb z0.h, z16.s
+ // vl128 state = 0x26bbb3d1
+ __ dci(0x45304828); // uqxtnb z8.h, z1.s
+ // vl128 state = 0x57d91166
+ __ dci(0x4530422c); // sqxtnb z12.h, z17.s
+ // vl128 state = 0x5548e0b4
+ __ dci(0x45305324); // sqxtunb z4.h, z25.s
+ // vl128 state = 0xf7eb8d9c
+ __ dci(0x45305325); // sqxtunb z5.h, z25.s
+ // vl128 state = 0xcf294303
+ __ dci(0x45305321); // sqxtunb z1.h, z25.s
+ // vl128 state = 0x6c7597d6
+ __ dci(0x453057a9); // sqxtunt z9.h, z29.s
+ // vl128 state = 0xe7be4fd5
+ __ dci(0x453043b9); // sqxtnb z25.h, z29.s
+ // vl128 state = 0x376f3f76
+ __ dci(0x453043bb); // sqxtnb z27.h, z29.s
+ // vl128 state = 0xf8389159
+ __ dci(0x4530431a); // sqxtnb z26.h, z24.s
+ // vl128 state = 0x8ca15413
+ __ dci(0x45304312); // sqxtnb z18.h, z24.s
+ // vl128 state = 0x2a6d8b90
+ __ dci(0x4530491a); // uqxtnb z26.h, z8.s
+ // vl128 state = 0x7119ff0d
+ __ dci(0x4530413b); // sqxtnb z27.h, z9.s
+ // vl128 state = 0x884748db
+ __ dci(0x4530482b); // uqxtnb z11.h, z1.s
+ // vl128 state = 0x43296aec
+ __ dci(0x4530483b); // uqxtnb z27.h, z1.s
+ // vl128 state = 0xdb9908f0
+ __ dci(0x45304979); // uqxtnb z25.h, z11.s
+ // vl128 state = 0xef30bfc8
+ __ dci(0x453049d1); // uqxtnb z17.h, z14.s
+ // vl128 state = 0xb46173d8
+ __ dci(0x456049d3); // uqxtnb z19.s, z14.d
+ // vl128 state = 0xcb8c3b83
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xcb8c3b83,
+ 0x92fb7f98,
+ 0xb7ec6385,
+ 0x81de8602,
+ 0xd970d431,
+ 0x2fe61431,
+ 0x359b1355,
+ 0xdeec900e,
+ 0xfd0c7d7d,
+ 0x62e89b19,
+ 0x43039424,
+ 0xdd42efc9,
+ 0x861010f1,
+ 0x82d68f37,
+ 0x3761a1d0,
+ 0xbcf3c5c9,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_eorbt_eortb) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x451892b8); // eorbt z24.b, z21.b, z24.b
+ // vl128 state = 0xc3f2b082
+ __ dci(0x455893ba); // eorbt z26.h, z29.h, z24.h
+ // vl128 state = 0xc7421198
+ __ dci(0x455892f8); // eorbt z24.h, z23.h, z24.h
+ // vl128 state = 0x4e155b96
+ __ dci(0x455092bc); // eorbt z28.h, z21.h, z16.h
+ // vl128 state = 0x09393ad0
+ __ dci(0x455893be); // eorbt z30.h, z29.h, z24.h
+ // vl128 state = 0x6d660844
+ __ dci(0x4558922e); // eorbt z14.h, z17.h, z24.h
+ // vl128 state = 0x84f1ff20
+ __ dci(0x45d892aa); // eorbt z10.d, z21.d, z24.d
+ // vl128 state = 0x568612d4
+ __ dci(0x454892a8); // eorbt z8.h, z21.h, z8.h
+ // vl128 state = 0x699a3e24
+ __ dci(0x45c890ac); // eorbt z12.d, z5.d, z8.d
+ // vl128 state = 0x17bb6d9b
+ __ dci(0x45c990ed); // eorbt z13.d, z7.d, z9.d
+ // vl128 state = 0xee5be73f
+ __ dci(0x45c892fd); // eorbt z29.d, z23.d, z8.d
+ // vl128 state = 0x141c47ed
+ __ dci(0x45c892f9); // eorbt z25.d, z23.d, z8.d
+ // vl128 state = 0xc3259593
+ __ dci(0x45c892f8); // eorbt z24.d, z23.d, z8.d
+ // vl128 state = 0x3bca0bcc
+ __ dci(0x45c892e8); // eorbt z8.d, z23.d, z8.d
+ // vl128 state = 0x4714ab64
+ __ dci(0x454a92ea); // eorbt z10.h, z23.h, z10.h
+ // vl128 state = 0x51360c73
+ __ dci(0x454092e2); // eorbt z2.h, z23.h, z0.h
+ // vl128 state = 0xe33859fe
+ __ dci(0x454092f2); // eorbt z18.h, z23.h, z0.h
+ // vl128 state = 0xa0d81168
+ __ dci(0x4550927a); // eorbt z26.h, z19.h, z16.h
+ // vl128 state = 0xe4983274
+ __ dci(0x4551923b); // eorbt z27.h, z17.h, z17.h
+ // vl128 state = 0x8e89eab7
+ __ dci(0x45d3923f); // eorbt z31.d, z17.d, z19.d
+ // vl128 state = 0x472bd288
+ __ dci(0x4553921d); // eorbt z29.h, z16.h, z19.h
+ // vl128 state = 0x61090ed4
+ __ dci(0x4553932d); // eorbt z13.h, z25.h, z19.h
+ // vl128 state = 0x3ef228eb
+ __ dci(0x4513912c); // eorbt z12.b, z9.b, z19.b
+ // vl128 state = 0x96d4505c
+ __ dci(0x4551912d); // eorbt z13.h, z9.h, z17.h
+ // vl128 state = 0x1c32baef
+ __ dci(0x45119029); // eorbt z9.b, z1.b, z17.b
+ // vl128 state = 0xa138f554
+ __ dci(0x45149028); // eorbt z8.b, z1.b, z20.b
+ // vl128 state = 0xf0681d9a
+ __ dci(0x459490aa); // eorbt z10.s, z5.s, z20.s
+ // vl128 state = 0xbd4b30f5
+ __ dci(0x458590a8); // eorbt z8.s, z5.s, z5.s
+ // vl128 state = 0x45c5b437
+ __ dci(0x4585948c); // eortb z12.s, z4.s, z5.s
+ // vl128 state = 0x22f90a7b
+ __ dci(0x45cd949c); // eortb z28.d, z4.d, z13.d
+ // vl128 state = 0x5e4584ca
+ __ dci(0x4589949d); // eortb z29.s, z4.s, z9.s
+ // vl128 state = 0x65ac913e
+ __ dci(0x458990ad); // eorbt z13.s, z5.s, z9.s
+ // vl128 state = 0x4f13d973
+ __ dci(0x459b90ac); // eorbt z12.s, z5.s, z27.s
+ // vl128 state = 0xd13bb801
+ __ dci(0x45db90ee); // eorbt z14.d, z7.d, z27.d
+ // vl128 state = 0xf24115d0
+ __ dci(0x45db916f); // eorbt z15.d, z11.d, z27.d
+ // vl128 state = 0x04f38375
+ __ dci(0x45db95e7); // eortb z7.d, z15.d, z27.d
+ // vl128 state = 0xe1046ae5
+ __ dci(0x45db94a3); // eortb z3.d, z5.d, z27.d
+ // vl128 state = 0xaaeae67e
+ __ dci(0x45dd94a1); // eortb z1.d, z5.d, z29.d
+ // vl128 state = 0xd67f6823
+ __ dci(0x45dd94b1); // eortb z17.d, z5.d, z29.d
+ // vl128 state = 0xf172245b
+ __ dci(0x45dd90f3); // eorbt z19.d, z7.d, z29.d
+ // vl128 state = 0xc99195b8
+ __ dci(0x458d90e3); // eorbt z3.s, z7.s, z13.s
+ // vl128 state = 0xe1a146cf
+ __ dci(0x458994e2); // eortb z2.s, z7.s, z9.s
+ // vl128 state = 0x8038f273
+ __ dci(0x458b94a3); // eortb z3.s, z5.s, z11.s
+ // vl128 state = 0x50bda372
+ __ dci(0x459b9481); // eortb z1.s, z4.s, z27.s
+ // vl128 state = 0xe8d53012
+ __ dci(0x455b9485); // eortb z5.h, z4.h, z27.h
+ // vl128 state = 0xdba33ea5
+ __ dci(0x454b9087); // eorbt z7.h, z4.h, z11.h
+ // vl128 state = 0xff7f1815
+ __ dci(0x45499003); // eorbt z3.h, z0.h, z9.h
+ // vl128 state = 0x5d6e0104
+ __ dci(0x454d9022); // eorbt z2.h, z1.h, z13.h
+ // vl128 state = 0xe9161cfe
+ __ dci(0x45099026); // eorbt z6.b, z1.b, z9.b
+ // vl128 state = 0x48126fb9
+ __ dci(0x454b9024); // eorbt z4.h, z1.h, z11.h
+ // vl128 state = 0x53cbfc46
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x53cbfc46,
+ 0x0f81a01e,
+ 0xf97c4e96,
+ 0x745e9ed6,
+ 0x4487a0a1,
+ 0x7ad79509,
+ 0x53577280,
+ 0x1e589717,
+ 0xaaa96af0,
+ 0x4f2b0884,
+ 0x24d2cd1c,
+ 0x4d89438d,
+ 0x9b327a12,
+ 0xeabfd558,
+ 0xb63e33f1,
+ 0xebd7d9ca,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_saturating_multiply_add_high_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+ __ dci(0x44d9721a); // sqrdmlah z26.d, z16.d, z25.d
+ // vl128 state = 0xc0474f3f
+ __ dci(0x44dd761b); // sqrdmlsh z27.d, z16.d, z29.d
+ // vl128 state = 0x102712ac
+ __ dci(0x44d4760b); // sqrdmlsh z11.d, z16.d, z20.d
+ // vl128 state = 0xe8666aa6
+ __ dci(0x44947709); // sqrdmlsh z9.s, z24.s, z20.s
+ // vl128 state = 0xdd18f643
+ __ dci(0x4494770b); // sqrdmlsh z11.s, z24.s, z20.s
+ // vl128 state = 0xac4a4d4c
+ __ dci(0x44d4773b); // sqrdmlsh z27.d, z25.d, z20.d
+ // vl128 state = 0x1a5447d4
+ __ dci(0x44dc7639); // sqrdmlsh z25.d, z17.d, z28.d
+ // vl128 state = 0xf547ac30
+ __ dci(0x44dc763b); // sqrdmlsh z27.d, z17.d, z28.d
+ // vl128 state = 0xb42d177a
+ __ dci(0x44d4743f); // sqrdmlsh z31.d, z1.d, z20.d
+ // vl128 state = 0xd0da2c6b
+ __ dci(0x449c742f); // sqrdmlsh z15.s, z1.s, z28.s
+ // vl128 state = 0xb24c8988
+ __ dci(0x449c7487); // sqrdmlsh z7.s, z4.s, z28.s
+ // vl128 state = 0x9e67ddac
+ __ dci(0x449c7485); // sqrdmlsh z5.s, z4.s, z28.s
+ // vl128 state = 0xd96b34e2
+ __ dci(0x448e7481); // sqrdmlsh z1.s, z4.s, z14.s
+ // vl128 state = 0x81d91007
+ __ dci(0x448e7480); // sqrdmlsh z0.s, z4.s, z14.s
+ // vl128 state = 0x901fa692
+ __ dci(0x449c7488); // sqrdmlsh z8.s, z4.s, z28.s
+ // vl128 state = 0xeedceee6
+ __ dci(0x441c758a); // sqrdmlsh z10.b, z12.b, z28.b
+ // vl128 state = 0x8dc4d389
+ __ dci(0x441475ae); // sqrdmlsh z14.b, z13.b, z20.b
+ // vl128 state = 0xb1711932
+ __ dci(0x440075ac); // sqrdmlsh z12.b, z13.b, z0.b
+ // vl128 state = 0x8cacf188
+ __ dci(0x440171bc); // sqrdmlah z28.b, z13.b, z1.b
+ // vl128 state = 0x9c8b9f4f
+ __ dci(0x440171b8); // sqrdmlah z24.b, z13.b, z1.b
+ // vl128 state = 0x562ebefa
+ __ dci(0x441971b9); // sqrdmlah z25.b, z13.b, z25.b
+ // vl128 state = 0x1ef60d31
+ __ dci(0x440970bb); // sqrdmlah z27.b, z5.b, z9.b
+ // vl128 state = 0x69bd18ee
+ __ dci(0x441870ba); // sqrdmlah z26.b, z5.b, z24.b
+ // vl128 state = 0x525b1f84
+ __ dci(0x441270b8); // sqrdmlah z24.b, z5.b, z18.b
+ // vl128 state = 0x3c7dadd8
+ __ dci(0x44927090); // sqrdmlah z16.s, z4.s, z18.s
+ // vl128 state = 0x276f0567
+ __ dci(0x44937292); // sqrdmlah z18.s, z20.s, z19.s
+ // vl128 state = 0x6f0f8bb4
+ __ dci(0x4491721a); // sqrdmlah z26.s, z16.s, z17.s
+ // vl128 state = 0x28eb737a
+ __ dci(0x44d3721b); // sqrdmlah z27.d, z16.d, z19.d
+ // vl128 state = 0xa3bd1133
+ __ dci(0x44d372ab); // sqrdmlah z11.d, z21.d, z19.d
+ // vl128 state = 0x6e81e8fd
+ __ dci(0x44d372a3); // sqrdmlah z3.d, z21.d, z19.d
+ // vl128 state = 0x55730750
+ __ dci(0x445376a1); // sqrdmlsh z1.h, z21.h, z19.h
+ // vl128 state = 0x7c7afd6d
+ __ dci(0x44527685); // sqrdmlsh z5.h, z20.h, z18.h
+ // vl128 state = 0x1c9dc1a1
+ __ dci(0x44127495); // sqrdmlsh z21.b, z4.b, z18.b
+ // vl128 state = 0xf2e07e92
+ __ dci(0x44127794); // sqrdmlsh z20.b, z28.b, z18.b
+ // vl128 state = 0xc5a2e589
+ __ dci(0x44527695); // sqrdmlsh z21.h, z20.h, z18.h
+ // vl128 state = 0x417df395
+ __ dci(0x445274dd); // sqrdmlsh z29.h, z6.h, z18.h
+ // vl128 state = 0x2e223308
+ __ dci(0x445774df); // sqrdmlsh z31.h, z6.h, z23.h
+ // vl128 state = 0x99047839
+ __ dci(0x445775fe); // sqrdmlsh z30.h, z15.h, z23.h
+ // vl128 state = 0x34a4be39
+ __ dci(0x445175ff); // sqrdmlsh z31.h, z15.h, z17.h
+ // vl128 state = 0x714b9d66
+ __ dci(0x44517557); // sqrdmlsh z23.h, z10.h, z17.h
+ // vl128 state = 0x2aa51ff4
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x2aa51ff4,
+ 0xde163ba0,
+ 0x8b237661,
+ 0x30086cf2,
+ 0xabf248f0,
+ 0xcc183608,
+ 0xa4103141,
+ 0x521ebe39,
+ 0xd746470e,
+ 0x141a51a4,
+ 0x695a47fd,
+ 0x0a74d701,
+ 0xd14bae63,
+ 0xf967aadb,
+ 0xdaed8896,
+ 0x7ba556cb,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_integer_pairwise_add_accumulate_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+ __ dci(0x4445b4e3); // uadalp z3.h, p5/m, z7.b
+ // vl128 state = 0x3ad015af
+ __ dci(0x4445b4e1); // uadalp z1.h, p5/m, z7.b
+ // vl128 state = 0x3f53978b
+ __ dci(0x4445bc65); // uadalp z5.h, p7/m, z3.b
+ // vl128 state = 0xf3340744
+ __ dci(0x4445be35); // uadalp z21.h, p7/m, z17.b
+ // vl128 state = 0xb6f81377
+ __ dci(0x4445be9d); // uadalp z29.h, p7/m, z20.b
+ // vl128 state = 0xaf772b37
+ __ dci(0x4444bc9c); // sadalp z28.h, p7/m, z4.b
+ // vl128 state = 0x591be304
+ __ dci(0x4444bc9d); // sadalp z29.h, p7/m, z4.b
+ // vl128 state = 0x406d9d34
+ __ dci(0x4444ba99); // sadalp z25.h, p6/m, z20.b
+ // vl128 state = 0xb455880f
+ __ dci(0x44c4ba09); // sadalp z9.d, p6/m, z16.s
+ // vl128 state = 0x5ef8e2ed
+ __ dci(0x44c4ba01); // sadalp z1.d, p6/m, z16.s
+ // vl128 state = 0xca2ccf0d
+ __ dci(0x44c4ba11); // sadalp z17.d, p6/m, z16.s
+ // vl128 state = 0x33bb9903
+ __ dci(0x4484bb15); // sadalp z21.s, p6/m, z24.h
+ // vl128 state = 0x3964a356
+ __ dci(0x4484b957); // sadalp z23.s, p6/m, z10.h
+ // vl128 state = 0x1e1426d2
+ __ dci(0x4484b953); // sadalp z19.s, p6/m, z10.h
+ // vl128 state = 0x83e2e1a6
+ __ dci(0x4484b943); // sadalp z3.s, p6/m, z10.h
+ // vl128 state = 0x24335149
+ __ dci(0x4484b102); // sadalp z2.s, p4/m, z8.h
+ // vl128 state = 0x8bde109a
+ __ dci(0x4484bd06); // sadalp z6.s, p7/m, z8.h
+ // vl128 state = 0x5abf30eb
+ __ dci(0x4484bdc2); // sadalp z2.s, p7/m, z14.h
+ // vl128 state = 0xcb199381
+ __ dci(0x4485b5c6); // uadalp z6.s, p5/m, z14.h
+ // vl128 state = 0x5f3819ad
+ __ dci(0x4485b5c2); // uadalp z2.s, p5/m, z14.h
+ // vl128 state = 0x5f6d69e4
+ __ dci(0x4485b5ca); // uadalp z10.s, p5/m, z14.h
+ // vl128 state = 0x1a0d7053
+ __ dci(0x4485b15a); // uadalp z26.s, p4/m, z10.h
+ // vl128 state = 0x9081b6cd
+ __ dci(0x44c5b95e); // uadalp z30.d, p6/m, z10.s
+ // vl128 state = 0x6b15107e
+ __ dci(0x44c5a14e); // uadalp z14.d, p0/m, z10.s
+ // vl128 state = 0x4a127dc2
+ __ dci(0x4445a1c6); // uadalp z6.h, p0/m, z14.b
+ // vl128 state = 0x06902399
+ __ dci(0x4445a1ce); // uadalp z14.h, p0/m, z14.b
+ // vl128 state = 0x1789be4a
+ __ dci(0x4444a9de); // sadalp z30.h, p2/m, z14.b
+ // vl128 state = 0x86732543
+ __ dci(0x4444adff); // sadalp z31.h, p3/m, z15.b
+ // vl128 state = 0xe326faef
+ __ dci(0x4444bdb7); // sadalp z23.h, p7/m, z13.b
+ // vl128 state = 0x46d5f328
+ __ dci(0x4444bda7); // sadalp z7.h, p7/m, z13.b
+ // vl128 state = 0x5cf7a973
+ __ dci(0x4445bd25); // uadalp z5.h, p7/m, z9.b
+ // vl128 state = 0xdf8cbb97
+ __ dci(0x4485bd35); // uadalp z21.s, p7/m, z9.h
+ // vl128 state = 0x330c3d35
+ __ dci(0x4485bc17); // uadalp z23.s, p7/m, z0.h
+ // vl128 state = 0x6ebfa4fe
+ __ dci(0x4485bc15); // uadalp z21.s, p7/m, z0.h
+ // vl128 state = 0x52f18385
+ __ dci(0x4485be91); // uadalp z17.s, p7/m, z20.h
+ // vl128 state = 0x82fa2d85
+ __ dci(0x4485be53); // uadalp z19.s, p7/m, z18.h
+ // vl128 state = 0xa7d6098b
+ __ dci(0x4485aa52); // uadalp z18.s, p2/m, z18.h
+ // vl128 state = 0xfe8faafa
+ __ dci(0x4485ae13); // uadalp z19.s, p3/m, z16.h
+ // vl128 state = 0xf2465f31
+ __ dci(0x4485b617); // uadalp z23.s, p5/m, z16.h
+ // vl128 state = 0xed6be8ed
+ __ dci(0x4485bc13); // uadalp z19.s, p7/m, z0.h
+ // vl128 state = 0xb2f95c3d
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xb2f95c3d,
+ 0xa4189170,
+ 0xed9e7f9e,
+ 0xfca732cb,
+ 0x4c94b2d7,
+ 0x92a2fb21,
+ 0xbca62a5c,
+ 0x9aec54d6,
+ 0x8df82b02,
+ 0x50c18764,
+ 0xd27e5a0e,
+ 0x1a538cc6,
+ 0x538b673e,
+ 0x37e4b499,
+ 0x7160cbd5,
+ 0x113951bc,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_pmul_mul_vector_unpredicated) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x04a56309); // mul z9.s, z24.s, z5.s
+ // vl128 state = 0x0ef461d5
+ __ dci(0x04a56148); // mul z8.s, z10.s, z5.s
+ // vl128 state = 0xce9f1381
+ __ dci(0x04a161d8); // mul z24.s, z14.s, z1.s
+ // vl128 state = 0x2a14ff8c
+ __ dci(0x04a16179); // mul z25.s, z11.s, z1.s
+ // vl128 state = 0x88a0241b
+ __ dci(0x04b36171); // mul z17.s, z11.s, z19.s
+ // vl128 state = 0x23aea8a6
+ __ dci(0x04fb6170); // mul z16.d, z11.d, z27.d
+ // vl128 state = 0x58eaa46d
+ __ dci(0x04fb6171); // mul z17.d, z11.d, z27.d
+ // vl128 state = 0xc733a399
+ __ dci(0x04fb6350); // mul z16.d, z26.d, z27.d
+ // vl128 state = 0x2806af41
+ __ dci(0x04eb6372); // mul z18.d, z27.d, z11.d
+ // vl128 state = 0x5ec775d1
+ __ dci(0x04eb6376); // mul z22.d, z27.d, z11.d
+ // vl128 state = 0x40d03f0d
+ __ dci(0x04ed637e); // mul z30.d, z27.d, z13.d
+ // vl128 state = 0xe3a61d56
+ __ dci(0x04e8637f); // mul z31.d, z27.d, z8.d
+ // vl128 state = 0x2eb4313f
+ __ dci(0x04a86337); // mul z23.s, z25.s, z8.s
+ // vl128 state = 0xc68e329e
+ __ dci(0x04a86336); // mul z22.s, z25.s, z8.s
+ // vl128 state = 0x177b1a43
+ __ dci(0x04ac63be); // mul z30.s, z29.s, z12.s
+ // vl128 state = 0xaaa415dd
+ __ dci(0x04ac63d6); // mul z22.s, z30.s, z12.s
+ // vl128 state = 0xaeb212b8
+ __ dci(0x042c67d2); // pmul z18.b, z30.b, z12.b
+ // vl128 state = 0xa11be1c8
+ __ dci(0x042c65f3); // pmul z19.b, z15.b, z12.b
+ // vl128 state = 0x8dd03a21
+ __ dci(0x042e65d2); // pmul z18.b, z14.b, z14.b
+ // vl128 state = 0x83ef9a66
+ __ dci(0x042f6550); // pmul z16.b, z10.b, z15.b
+ // vl128 state = 0x6a495368
+ __ dci(0x042e6754); // pmul z20.b, z26.b, z14.b
+ // vl128 state = 0x0b6c3ccf
+ __ dci(0x042e6750); // pmul z16.b, z26.b, z14.b
+ // vl128 state = 0xa745457f
+ __ dci(0x042e6600); // pmul z0.b, z16.b, z14.b
+ // vl128 state = 0x92fe8b9d
+ __ dci(0x042e6602); // pmul z2.b, z16.b, z14.b
+ // vl128 state = 0xda39ebe2
+ __ dci(0x043f6600); // pmul z0.b, z16.b, z31.b
+ // vl128 state = 0xcc36d223
+ __ dci(0x042b6608); // pmul z8.b, z16.b, z11.b
+ // vl128 state = 0x8b94d25a
+ __ dci(0x042a6700); // pmul z0.b, z24.b, z10.b
+ // vl128 state = 0x0118ccba
+ __ dci(0x042a6710); // pmul z16.b, z24.b, z10.b
+ // vl128 state = 0x4b38543b
+ __ dci(0x042a6714); // pmul z20.b, z24.b, z10.b
+ // vl128 state = 0xa54e126f
+ __ dci(0x042a6716); // pmul z22.b, z24.b, z10.b
+ // vl128 state = 0x61ad87c9
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x61ad87c9,
+ 0x82df488f,
+ 0xc0d7c1a4,
+ 0x4f86e761,
+ 0x8d651d7b,
+ 0x294cf55a,
+ 0x060ab34c,
+ 0x1db0e99c,
+ 0x4b0b59d7,
+ 0xcee6dfd1,
+ 0x29575669,
+ 0x5c1c7922,
+ 0x4b1957ed,
+ 0x8bc5712b,
+ 0x6ac59fdc,
+ 0x048ce1b5,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_smulh_umulh_vector_unpredicated) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x04e46c3b); // umulh z27.d, z1.d, z4.d
+ // vl128 state = 0xfb66ba83
+ __ dci(0x04ac6c3a); // umulh z26.s, z1.s, z12.s
+ // vl128 state = 0x45cdb9a2
+ __ dci(0x04a86e32); // umulh z18.s, z17.s, z8.s
+ // vl128 state = 0x4ad150dc
+ __ dci(0x04a86a7a); // smulh z26.s, z19.s, z8.s
+ // vl128 state = 0xbf08e2cb
+ __ dci(0x04e86b7b); // smulh z27.d, z27.d, z8.d
+ // vl128 state = 0x51ad0655
+ __ dci(0x04ee6b73); // smulh z19.d, z27.d, z14.d
+ // vl128 state = 0xf764bda9
+ __ dci(0x04ec6f7b); // umulh z27.d, z27.d, z12.d
+ // vl128 state = 0xc90f20ef
+ __ dci(0x04ac6f3a); // umulh z26.s, z25.s, z12.s
+ // vl128 state = 0x9ec08333
+ __ dci(0x04ac6f32); // umulh z18.s, z25.s, z12.s
+ // vl128 state = 0x3620406c
+ __ dci(0x042e6f3a); // umulh z26.b, z25.b, z14.b
+ // vl128 state = 0x4e18467a
+ __ dci(0x042a6b2a); // smulh z10.b, z25.b, z10.b
+ // vl128 state = 0x13c7cd6f
+ __ dci(0x042a6b2b); // smulh z11.b, z25.b, z10.b
+ // vl128 state = 0x16a44c1b
+ __ dci(0x043a6b03); // smulh z3.b, z24.b, z26.b
+ // vl128 state = 0x9f8f203b
+ __ dci(0x047a690b); // smulh z11.h, z8.h, z26.h
+ // vl128 state = 0xce0aa45e
+ __ dci(0x047a690a); // smulh z10.h, z8.h, z26.h
+ // vl128 state = 0xb667d59b
+ __ dci(0x0479690e); // smulh z14.h, z8.h, z25.h
+ // vl128 state = 0xd76639b7
+ __ dci(0x046d690c); // smulh z12.h, z8.h, z13.h
+ // vl128 state = 0x736b227e
+ __ dci(0x042f690e); // smulh z14.b, z8.b, z15.b
+ // vl128 state = 0xc0804df9
+ __ dci(0x042f69ac); // smulh z12.b, z13.b, z15.b
+ // vl128 state = 0x8a5509f5
+ __ dci(0x042f696e); // smulh z14.b, z11.b, z15.b
+ // vl128 state = 0x761f9cf8
+ __ dci(0x042e6b6a); // smulh z10.b, z27.b, z14.b
+ // vl128 state = 0x3b5f2705
+ __ dci(0x042e6b6e); // smulh z14.b, z27.b, z14.b
+ // vl128 state = 0x53b23a0a
+ __ dci(0x04366b6f); // smulh z15.b, z27.b, z22.b
+ // vl128 state = 0x5bd53ce9
+ __ dci(0x04766f7f); // umulh z31.h, z27.h, z22.h
+ // vl128 state = 0x701bec8f
+ __ dci(0x04746fef); // umulh z15.h, z31.h, z20.h
+ // vl128 state = 0x29697c8c
+ __ dci(0x04706dee); // umulh z14.h, z15.h, z16.h
+ // vl128 state = 0x2088f1c2
+ __ dci(0x04706c7e); // umulh z30.h, z3.h, z16.h
+ // vl128 state = 0x56224145
+ __ dci(0x04306c2e); // umulh z14.b, z1.b, z16.b
+ // vl128 state = 0x2ba58c9c
+ __ dci(0x04b06e2a); // umulh z10.s, z17.s, z16.s
+ // vl128 state = 0xb933d058
+ __ dci(0x04b56e2e); // umulh z14.s, z17.s, z21.s
+ // vl128 state = 0x184daee9
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x184daee9,
+ 0x19454232,
+ 0xa56823a3,
+ 0xe334897a,
+ 0xcaa988e1,
+ 0x614cbf4f,
+ 0xfaa384e4,
+ 0x4b45e885,
+ 0xef930ead,
+ 0x49304b9a,
+ 0x4f1d830e,
+ 0xa41c1a95,
+ 0xa1ea8d07,
+ 0x62ca97b4,
+ 0x15f52cac,
+ 0xc190cd57,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_arith_interleaved_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x459289bd); // ssublbt z29.s, z13.h, z18.h
+ // vl128 state = 0xe2e0965a
+ __ dci(0x459289bf); // ssublbt z31.s, z13.h, z18.h
+ // vl128 state = 0x64e3e1a3
+ __ dci(0x45d689be); // ssublbt z30.d, z13.s, z22.s
+ // vl128 state = 0x02711ec2
+ __ dci(0x45d68916); // ssublbt z22.d, z8.s, z22.s
+ // vl128 state = 0x7ff6f63f
+ __ dci(0x45968957); // ssublbt z23.s, z10.h, z22.h
+ // vl128 state = 0xa9aace7f
+ __ dci(0x45968a55); // ssublbt z21.s, z18.h, z22.h
+ // vl128 state = 0x6007d46c
+ __ dci(0x45868251); // saddlbt z17.s, z18.h, z6.h
+ // vl128 state = 0xecea329d
+ __ dci(0x45868230); // saddlbt z16.s, z17.h, z6.h
+ // vl128 state = 0xa16880b8
+ __ dci(0x45868231); // saddlbt z17.s, z17.h, z6.h
+ // vl128 state = 0xcff73a01
+ __ dci(0x458c8235); // saddlbt z21.s, z17.h, z12.h
+ // vl128 state = 0xf6486b24
+ __ dci(0x458c8231); // saddlbt z17.s, z17.h, z12.h
+ // vl128 state = 0xa5612e07
+ __ dci(0x459c8021); // saddlbt z1.s, z1.h, z28.h
+ // vl128 state = 0xd71ab1e8
+ __ dci(0x458c8009); // saddlbt z9.s, z0.h, z12.h
+ // vl128 state = 0xaf74bd16
+ __ dci(0x459e800b); // saddlbt z11.s, z0.h, z30.h
+ // vl128 state = 0x96dee616
+ __ dci(0x45928003); // saddlbt z3.s, z0.h, z18.h
+ // vl128 state = 0x652e9cca
+ __ dci(0x45d28207); // saddlbt z7.d, z16.s, z18.s
+ // vl128 state = 0xc6b07290
+ __ dci(0x45da8225); // saddlbt z5.d, z17.s, z26.s
+ // vl128 state = 0x8c74a35d
+ __ dci(0x45da830d); // saddlbt z13.d, z24.s, z26.s
+ // vl128 state = 0xff620001
+ __ dci(0x45cb8309); // saddlbt z9.d, z24.s, z11.s
+ // vl128 state = 0x2147f374
+ __ dci(0x45ca8119); // saddlbt z25.d, z8.s, z10.s
+ // vl128 state = 0x6f961936
+ __ dci(0x45ce831d); // saddlbt z29.d, z24.s, z14.s
+ // vl128 state = 0xaa91e68a
+ __ dci(0x45ce8135); // saddlbt z21.d, z9.s, z14.s
+ // vl128 state = 0xa5635d0e
+ __ dci(0x458e8331); // saddlbt z17.s, z25.h, z14.h
+ // vl128 state = 0xa0705ea7
+ __ dci(0x458e8030); // saddlbt z16.s, z1.h, z14.h
+ // vl128 state = 0x397dc4d5
+ __ dci(0x458e8271); // saddlbt z17.s, z19.h, z14.h
+ // vl128 state = 0x5e975082
+ __ dci(0x458a82e1); // saddlbt z1.s, z23.h, z10.h
+ // vl128 state = 0x048f8dea
+ __ dci(0x458a8240); // saddlbt z0.s, z18.h, z10.h
+ // vl128 state = 0xd9104514
+ __ dci(0x458a8e50); // ssubltb z16.s, z18.h, z10.h
+ // vl128 state = 0x6afbf8b6
+ __ dci(0x45988e58); // ssubltb z24.s, z18.h, z24.h
+ // vl128 state = 0xfe44a2f8
+ __ dci(0x45d08e59); // ssubltb z25.d, z18.s, z16.s
+ // vl128 state = 0x050fb0ab
+ __ dci(0x45d08e58); // ssubltb z24.d, z18.s, z16.s
+ // vl128 state = 0xc9160f61
+ __ dci(0x45d08259); // saddlbt z25.d, z18.s, z16.s
+ // vl128 state = 0x70ae0c4a
+ __ dci(0x45d08b51); // ssublbt z17.d, z26.s, z16.s
+ // vl128 state = 0xe627770c
+ __ dci(0x45d08970); // ssublbt z16.d, z11.s, z16.s
+ // vl128 state = 0x445fd924
+ __ dci(0x45d28d74); // ssubltb z20.d, z11.s, z18.s
+ // vl128 state = 0x8c7dd6c0
+ __ dci(0x45c28d56); // ssubltb z22.d, z10.s, z2.s
+ // vl128 state = 0x925de210
+ __ dci(0x45c28d52); // ssubltb z18.d, z10.s, z2.s
+ // vl128 state = 0x28b67c05
+ __ dci(0x45c48d5a); // ssubltb z26.d, z10.s, z4.s
+ // vl128 state = 0x48e8377c
+ __ dci(0x45c18d5b); // ssubltb z27.d, z10.s, z1.s
+ // vl128 state = 0xb46af33e
+ __ dci(0x45818d13); // ssubltb z19.s, z8.h, z1.h
+ // vl128 state = 0x12fada0b
+ __ dci(0x45818d12); // ssubltb z18.s, z8.h, z1.h
+ // vl128 state = 0xeaeea3cd
+ __ dci(0x45858d9a); // ssubltb z26.s, z12.h, z5.h
+ // vl128 state = 0x6d466bd8
+ __ dci(0x45858df2); // ssubltb z18.s, z15.h, z5.h
+ // vl128 state = 0x60c67411
+ __ dci(0x45c58d62); // ssubltb z2.d, z11.s, z5.s
+ // vl128 state = 0xec3b40ed
+ __ dci(0x45c58b72); // ssublbt z18.d, z27.s, z5.s
+ // vl128 state = 0x5b421b0a
+ __ dci(0x45858a76); // ssublbt z22.s, z19.h, z5.h
+ // vl128 state = 0x8a0f26e9
+ __ dci(0x45878877); // ssublbt z23.s, z3.h, z7.h
+ // vl128 state = 0xc224293b
+ __ dci(0x458f8073); // saddlbt z19.s, z3.h, z15.h
+ // vl128 state = 0x9f5c0b50
+ __ dci(0x45878051); // saddlbt z17.s, z2.h, z7.h
+ // vl128 state = 0x2ae674c9
+ __ dci(0x45838841); // ssublbt z1.s, z2.h, z3.h
+ // vl128 state = 0x1dff4e20
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x1dff4e20,
+ 0x3d2c11df,
+ 0x64caeccf,
+ 0x7940c227,
+ 0xf5f59485,
+ 0x7ad48c48,
+ 0xcde4523b,
+ 0xcb5849f0,
+ 0x1e7e9722,
+ 0x8049333f,
+ 0x40d95eb3,
+ 0x628a428d,
+ 0x1cf123f2,
+ 0x8d377510,
+ 0x44a03b91,
+ 0xabe90e98,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sqabs_sqneg) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4448b23a); // sqabs z26.h, p4/m, z17.h
+ // vl128 state = 0x4aadd589
+ __ dci(0x4448b23e); // sqabs z30.h, p4/m, z17.h
+ // vl128 state = 0x86da455e
+ __ dci(0x4448a21c); // sqabs z28.h, p0/m, z16.h
+ // vl128 state = 0x4eecab5c
+ __ dci(0x4408a298); // sqabs z24.b, p0/m, z20.b
+ // vl128 state = 0xf81ee16e
+ __ dci(0x4408a0dc); // sqabs z28.b, p0/m, z6.b
+ // vl128 state = 0x84b94ec5
+ __ dci(0x4408a0de); // sqabs z30.b, p0/m, z6.b
+ // vl128 state = 0x626db033
+ __ dci(0x4408a19c); // sqabs z28.b, p0/m, z12.b
+ // vl128 state = 0x181303a1
+ __ dci(0x4408a3d4); // sqabs z20.b, p0/m, z30.b
+ // vl128 state = 0xf4e93ff3
+ __ dci(0x4489a3dc); // sqneg z28.s, p0/m, z30.s
+ // vl128 state = 0xffe7a865
+ __ dci(0x4409a1d4); // sqneg z20.b, p0/m, z14.b
+ // vl128 state = 0x6a27d8fe
+ __ dci(0x4408a3d0); // sqabs z16.b, p0/m, z30.b
+ // vl128 state = 0x9ffc0414
+ __ dci(0x44c8a3d8); // sqabs z24.d, p0/m, z30.d
+ // vl128 state = 0xd59acd78
+ __ dci(0x44c8b3fa); // sqabs z26.d, p4/m, z31.d
+ // vl128 state = 0x8853f8ac
+ __ dci(0x44c8a2fb); // sqabs z27.d, p0/m, z23.d
+ // vl128 state = 0x439e9079
+ __ dci(0x44c8a2f9); // sqabs z25.d, p0/m, z23.d
+ // vl128 state = 0xbaaa56a6
+ __ dci(0x4488a2db); // sqabs z27.s, p0/m, z22.s
+ // vl128 state = 0x328cbd5a
+ __ dci(0x4488a2df); // sqabs z31.s, p0/m, z22.s
+ // vl128 state = 0x4a74b2da
+ __ dci(0x4488a2cf); // sqabs z15.s, p0/m, z22.s
+ // vl128 state = 0x52af62a6
+ __ dci(0x4488a04b); // sqabs z11.s, p0/m, z2.s
+ // vl128 state = 0xa45aef42
+ __ dci(0x4488a02f); // sqabs z15.s, p0/m, z1.s
+ // vl128 state = 0x0b5444ed
+ __ dci(0x4489a06d); // sqneg z13.s, p0/m, z3.s
+ // vl128 state = 0x6f0912d5
+ __ dci(0x4489a449); // sqneg z9.s, p1/m, z2.s
+ // vl128 state = 0x669ac78a
+ __ dci(0x4489a50b); // sqneg z11.s, p1/m, z8.s
+ // vl128 state = 0x58ae27ee
+ __ dci(0x4488a71b); // sqabs z27.s, p1/m, z24.s
+ // vl128 state = 0xa54925f9
+ __ dci(0x4408a519); // sqabs z25.b, p1/m, z8.b
+ // vl128 state = 0x45c13095
+ __ dci(0x4408a158); // sqabs z24.b, p0/m, z10.b
+ // vl128 state = 0x2d6d547a
+ __ dci(0x4488a168); // sqabs z8.s, p0/m, z11.s
+ // vl128 state = 0xc976b77b
+ __ dci(0x44c9a16c); // sqneg z12.d, p0/m, z11.d
+ // vl128 state = 0x766e750f
+ __ dci(0x44c9a17c); // sqneg z28.d, p0/m, z11.d
+ // vl128 state = 0xbf22858d
+ __ dci(0x44c9a878); // sqneg z24.d, p2/m, z3.d
+ // vl128 state = 0xe563a474
+ __ dci(0x44c9a8d9); // sqneg z25.d, p2/m, z6.d
+ // vl128 state = 0x573c2648
+ __ dci(0x44c9b85b); // sqneg z27.d, p6/m, z2.d
+ // vl128 state = 0x03cdf714
+ __ dci(0x4449b87f); // sqneg z31.h, p6/m, z3.h
+ // vl128 state = 0xff4e2cb1
+ __ dci(0x4449b81d); // sqneg z29.h, p6/m, z0.h
+ // vl128 state = 0xaab7065e
+ __ dci(0x4449a895); // sqneg z21.h, p2/m, z4.h
+ // vl128 state = 0x60d4a6d3
+ __ dci(0x4449a825); // sqneg z5.h, p2/m, z1.h
+ // vl128 state = 0x3bed34e4
+ __ dci(0x4449a821); // sqneg z1.h, p2/m, z1.h
+ // vl128 state = 0xaa750880
+ __ dci(0x4449a820); // sqneg z0.h, p2/m, z1.h
+ // vl128 state = 0xfca9d635
+ __ dci(0x4449a822); // sqneg z2.h, p2/m, z1.h
+ // vl128 state = 0x8a92f3e7
+ __ dci(0x4449ae23); // sqneg z3.h, p3/m, z17.h
+ // vl128 state = 0xc2db1ac5
+ __ dci(0x4449af73); // sqneg z19.h, p3/m, z27.h
+ // vl128 state = 0x386f5f27
+ __ dci(0x4449af77); // sqneg z23.h, p3/m, z27.h
+ // vl128 state = 0xff4fd505
+ __ dci(0x4489af67); // sqneg z7.s, p3/m, z27.s
+ // vl128 state = 0x4c897605
+ __ dci(0x4489ad25); // sqneg z5.s, p3/m, z9.s
+ // vl128 state = 0xcc73333a
+ __ dci(0x4409ad07); // sqneg z7.b, p3/m, z8.b
+ // vl128 state = 0x58d37b50
+ __ dci(0x4489ad85); // sqneg z5.s, p3/m, z12.s
+ // vl128 state = 0x2a142b9d
+ __ dci(0x44c9a984); // sqneg z4.d, p2/m, z12.d
+ // vl128 state = 0x006fd35a
+ __ dci(0x44c9a926); // sqneg z6.d, p2/m, z9.d
+ // vl128 state = 0x06c05c5d
+ __ dci(0x4449ab2e); // sqneg z14.h, p2/m, z25.h
+ // vl128 state = 0xe41a6fc4
+ __ dci(0x4449ab3e); // sqneg z30.h, p2/m, z25.h
+ // vl128 state = 0x6e574bec
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x6e574bec,
+ 0xec677945,
+ 0xe7357ba7,
+ 0xbbf92859,
+ 0x3f42d943,
+ 0xe2db0bb1,
+ 0x704d1161,
+ 0xc0e1f809,
+ 0x887dd5e7,
+ 0x452b8b80,
+ 0xcf455511,
+ 0x821ad0bc,
+ 0xb98b1eac,
+ 0x49ae6871,
+ 0x16b2e0a6,
+ 0xaba4d260,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_urecpe_ursqrte) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0x4481bee8); // ursqrte z8.s, p7/m, z23.s
+ // vl128 state = 0x38c317d5
+ __ dci(0x4480bea9); // urecpe z9.s, p7/m, z21.s
+ // vl128 state = 0x8412e46d
+ __ dci(0x4481bfab); // ursqrte z11.s, p7/m, z29.s
+ // vl128 state = 0xae6c2805
+ __ dci(0x4481b9a3); // ursqrte z3.s, p6/m, z13.s
+ // vl128 state = 0x114331ab
+ __ dci(0x4481aba2); // ursqrte z2.s, p2/m, z29.s
+ // vl128 state = 0x88f2308d
+ __ dci(0x4480abe6); // urecpe z6.s, p2/m, z31.s
+ // vl128 state = 0x328b45b8
+ __ dci(0x4480afa2); // urecpe z2.s, p3/m, z29.s
+ // vl128 state = 0x7b67ded4
+ __ dci(0x4480ae23); // urecpe z3.s, p3/m, z17.s
+ // vl128 state = 0x48d1ac45
+ __ dci(0x4481aa27); // ursqrte z7.s, p2/m, z17.s
+ // vl128 state = 0x475f61b6
+ __ dci(0x4481a325); // ursqrte z5.s, p0/m, z25.s
+ // vl128 state = 0xfbf0b767
+ __ dci(0x4481a321); // ursqrte z1.s, p0/m, z25.s
+ // vl128 state = 0x31481484
+ __ dci(0x4481ab05); // ursqrte z5.s, p2/m, z24.s
+ // vl128 state = 0x5aca5e43
+ __ dci(0x4481a995); // ursqrte z21.s, p2/m, z12.s
+ // vl128 state = 0xe3b96378
+ __ dci(0x4481bb91); // ursqrte z17.s, p6/m, z28.s
+ // vl128 state = 0x9d469964
+ __ dci(0x4481b199); // ursqrte z25.s, p4/m, z12.s
+ // vl128 state = 0xbbabbb9d
+ __ dci(0x4481a989); // ursqrte z9.s, p2/m, z12.s
+ // vl128 state = 0xf83e651c
+ __ dci(0x4481b18b); // ursqrte z11.s, p4/m, z12.s
+ // vl128 state = 0x70a808da
+ __ dci(0x4480b089); // urecpe z9.s, p4/m, z4.s
+ // vl128 state = 0x427916ac
+ __ dci(0x4480b2c1); // urecpe z1.s, p4/m, z22.s
+ // vl128 state = 0xbf35be88
+ __ dci(0x4480aad1); // urecpe z17.s, p2/m, z22.s
+ // vl128 state = 0xaf69727b
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xaf69727b,
+ 0x7fda1a01,
+ 0xd299e078,
+ 0x9a794a84,
+ 0x47a453c1,
+ 0xecc67cf0,
+ 0x04122ec2,
+ 0x82dd5669,
+ 0xcb2bb910,
+ 0xcc73c54c,
+ 0x4660030f,
+ 0x7c42b056,
+ 0x498a73b1,
+ 0x1de89fad,
+ 0x5411c616,
+ 0x9f378bac,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_arith_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45573eac); // uabdlt z12.h, z21.b, z23.b
+ // vl128 state = 0x2ee2e7d4
+ __ dci(0x45573c84); // uabdlt z4.h, z4.b, z23.b
+ // vl128 state = 0x33413c6f
+ __ dci(0x45571d8c); // usublt z12.h, z12.b, z23.b
+ // vl128 state = 0xb95ffb7e
+ __ dci(0x45971d8e); // usublt z14.s, z12.h, z23.h
+ // vl128 state = 0xac4d0015
+ __ dci(0x45d7158c); // ssublt z12.d, z12.s, z23.s
+ // vl128 state = 0xe5341703
+ __ dci(0x4557119c); // ssublb z28.h, z12.b, z23.b
+ // vl128 state = 0x744f8598
+ __ dci(0x45d5118c); // ssublb z12.d, z12.s, z21.s
+ // vl128 state = 0x120c8bf7
+ __ dci(0x45551088); // ssublb z8.h, z4.b, z21.b
+ // vl128 state = 0xbf53c9ed
+ __ dci(0x455410cc); // ssublb z12.h, z6.b, z20.b
+ // vl128 state = 0x2642a908
+ __ dci(0x454414c8); // ssublt z8.h, z6.b, z4.b
+ // vl128 state = 0x0682c7d0
+ __ dci(0x454510c9); // ssublb z9.h, z6.b, z5.b
+ // vl128 state = 0x1966420e
+ __ dci(0x455510ed); // ssublb z13.h, z7.b, z21.b
+ // vl128 state = 0xdd0ec707
+ __ dci(0x455508ef); // uaddlb z15.h, z7.b, z21.b
+ // vl128 state = 0x0756dbf9
+ __ dci(0x455502e7); // saddlb z7.h, z23.b, z21.b
+ // vl128 state = 0xb991e688
+ __ dci(0x455d06f7); // saddlt z23.h, z23.b, z29.b
+ // vl128 state = 0x55399de0
+ __ dci(0x455f06df); // saddlt z31.h, z22.b, z31.b
+ // vl128 state = 0x3379dce4
+ __ dci(0x45de06db); // saddlt z27.d, z22.s, z30.s
+ // vl128 state = 0xebf6b857
+ __ dci(0x45c606da); // saddlt z26.d, z22.s, z6.s
+ // vl128 state = 0x7625ec15
+ __ dci(0x45c306db); // saddlt z27.d, z22.s, z3.s
+ // vl128 state = 0x549988fd
+ __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b
+ // vl128 state = 0xb645cb0f
+ __ dci(0x455306d1); // saddlt z17.h, z22.b, z19.b
+ // vl128 state = 0x20a70427
+ __ dci(0x455306d3); // saddlt z19.h, z22.b, z19.b
+ // vl128 state = 0xd263ec78
+ __ dci(0x45510edb); // uaddlt z27.h, z22.b, z17.b
+ // vl128 state = 0xeecd9b44
+ __ dci(0x45510bdf); // uaddlb z31.h, z30.b, z17.b
+ // vl128 state = 0x0577c3d4
+ __ dci(0x45d10b4f); // uaddlb z15.d, z26.s, z17.s
+ // vl128 state = 0xca18b475
+ __ dci(0x45810b47); // uaddlb z7.s, z26.h, z1.h
+ // vl128 state = 0xdfe68417
+ __ dci(0x45811bc3); // usublb z3.s, z30.h, z1.h
+ // vl128 state = 0x96fe0360
+ __ dci(0x45891b82); // usublb z2.s, z28.h, z9.h
+ // vl128 state = 0x7e58a9d5
+ __ dci(0x4589398a); // uabdlb z10.s, z12.h, z9.h
+ // vl128 state = 0xd7612435
+ __ dci(0x458919ab); // usublb z11.s, z13.h, z9.h
+ // vl128 state = 0x8842dbca
+ __ dci(0x45cb19af); // usublb z15.d, z13.s, z11.s
+ // vl128 state = 0xfcac3d0f
+ __ dci(0x45cb19bf); // usublb z31.d, z13.s, z11.s
+ // vl128 state = 0x7b4952d6
+ __ dci(0x45cb190f); // usublb z15.d, z8.s, z11.s
+ // vl128 state = 0xb41cb8a3
+ __ dci(0x45cb1d8d); // usublt z13.d, z12.s, z11.s
+ // vl128 state = 0x9197543e
+ __ dci(0x45cb1d89); // usublt z9.d, z12.s, z11.s
+ // vl128 state = 0x3cc7e16c
+ __ dci(0x454b0d8b); // uaddlt z11.h, z12.b, z11.b
+ // vl128 state = 0x5c52744d
+ __ dci(0x45cb1d8a); // usublt z10.d, z12.s, z11.s
+ // vl128 state = 0x24c91c53
+ __ dci(0x454f1d8e); // usublt z14.h, z12.b, z15.b
+ // vl128 state = 0x0091f2f1
+ __ dci(0x455b1d8f); // usublt z15.h, z12.b, z27.b
+ // vl128 state = 0x521f94f7
+ __ dci(0x455a1c87); // usublt z7.h, z4.b, z26.b
+ // vl128 state = 0xa0631870
+ __ dci(0x454a1cb7); // usublt z23.h, z5.b, z10.b
+ // vl128 state = 0x089384c7
+ __ dci(0x454218a7); // usublb z7.h, z5.b, z2.b
+ // vl128 state = 0xe8c3c063
+ __ dci(0x454a19a6); // usublb z6.h, z13.b, z10.b
+ // vl128 state = 0x7a9f53ab
+ __ dci(0x454a3da2); // uabdlt z2.h, z13.b, z10.b
+ // vl128 state = 0x68d5f375
+ __ dci(0x45423ca6); // uabdlt z6.h, z5.b, z2.b
+ // vl128 state = 0x2c980ff7
+ __ dci(0x454a34a7); // sabdlt z7.h, z5.b, z10.b
+ // vl128 state = 0xe38196aa
+ __ dci(0x454a3466); // sabdlt z6.h, z3.b, z10.b
+ // vl128 state = 0x86c5bcb2
+ __ dci(0x454b146e); // ssublt z14.h, z3.b, z11.b
+ // vl128 state = 0xf8527375
+ __ dci(0x454b146a); // ssublt z10.h, z3.b, z11.b
+ // vl128 state = 0xf4bfb710
+ __ dci(0x454b147a); // ssublt z26.h, z3.b, z11.b
+ // vl128 state = 0xe1000ccf
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xe1000ccf,
+ 0xd320fd27,
+ 0x356a62d9,
+ 0xc6245994,
+ 0x78aeec8a,
+ 0xb5d0402b,
+ 0x06684b9e,
+ 0x6033f51d,
+ 0xd174ee86,
+ 0x80baaecc,
+ 0x2c9b263c,
+ 0x3fba551a,
+ 0x489fb8b7,
+ 0x862c9b27,
+ 0xc0549096,
+ 0xa927d570,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_arith_wide) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45494683); // saddwt z3.h, z20.h, z9.b
+ // vl128 state = 0x9a3fc71a
+ __ dci(0x45494687); // saddwt z7.h, z20.h, z9.b
+ // vl128 state = 0xb016cb2f
+ __ dci(0x454b46d7); // saddwt z23.h, z22.h, z11.b
+ // vl128 state = 0x5ce3d8a0
+ __ dci(0x455b56d5); // ssubwt z21.h, z22.h, z27.b
+ // vl128 state = 0xbace5453
+ __ dci(0x455b567d); // ssubwt z29.h, z19.h, z27.b
+ // vl128 state = 0x1f510928
+ __ dci(0x455b506d); // ssubwb z13.h, z3.h, z27.b
+ // vl128 state = 0x19ea553e
+ __ dci(0x4559502f); // ssubwb z15.h, z1.h, z25.b
+ // vl128 state = 0x4d88e5db
+ __ dci(0x45d95427); // ssubwt z7.d, z1.d, z25.s
+ // vl128 state = 0x069804b6
+ __ dci(0x45d95426); // ssubwt z6.d, z1.d, z25.s
+ // vl128 state = 0xfe46cf10
+ __ dci(0x45db5c36); // usubwt z22.d, z1.d, z27.s
+ // vl128 state = 0xad3c8120
+ __ dci(0x45d95d37); // usubwt z23.d, z9.d, z25.s
+ // vl128 state = 0x833d76fb
+ __ dci(0x45d55d27); // usubwt z7.d, z9.d, z21.s
+ // vl128 state = 0xc536845d
+ __ dci(0x45d44d25); // uaddwt z5.d, z9.d, z20.s
+ // vl128 state = 0x21f5a29c
+ __ dci(0x45dc4927); // uaddwb z7.d, z9.d, z28.s
+ // vl128 state = 0xfe67da2a
+ __ dci(0x455c490f); // uaddwb z15.h, z8.h, z28.b
+ // vl128 state = 0x5ec5d506
+ __ dci(0x455c490b); // uaddwb z11.h, z8.h, z28.b
+ // vl128 state = 0x74b7d2fc
+ __ dci(0x45584923); // uaddwb z3.h, z9.h, z24.b
+ // vl128 state = 0xa785f3c3
+ __ dci(0x45584922); // uaddwb z2.h, z9.h, z24.b
+ // vl128 state = 0x373049c0
+ __ dci(0x45584940); // uaddwb z0.h, z10.h, z24.b
+ // vl128 state = 0xbf385483
+ __ dci(0x45da4944); // uaddwb z4.d, z10.d, z26.s
+ // vl128 state = 0x94cd3b86
+ __ dci(0x45524945); // uaddwb z5.h, z10.h, z18.b
+ // vl128 state = 0x8535094f
+ __ dci(0x4540494d); // uaddwb z13.h, z10.h, z0.b
+ // vl128 state = 0x328abbdb
+ __ dci(0x45c04909); // uaddwb z9.d, z8.d, z0.s
+ // vl128 state = 0x253064cb
+ __ dci(0x45c8498d); // uaddwb z13.d, z12.d, z8.s
+ // vl128 state = 0xa1b39fe0
+ __ dci(0x45c0418f); // saddwb z15.d, z12.d, z0.s
+ // vl128 state = 0xa72048d9
+ __ dci(0x45d84187); // saddwb z7.d, z12.d, z24.s
+ // vl128 state = 0x4c8a23ac
+ __ dci(0x45dc5197); // ssubwb z23.d, z12.d, z28.s
+ // vl128 state = 0x352a3d60
+ __ dci(0x45dc5d93); // usubwt z19.d, z12.d, z28.s
+ // vl128 state = 0x404b9e8b
+ __ dci(0x45dd5592); // ssubwt z18.d, z12.d, z29.s
+ // vl128 state = 0xf46cc758
+ __ dci(0x45dd5550); // ssubwt z16.d, z10.d, z29.s
+ // vl128 state = 0x171ebd36
+ __ dci(0x45cd55d4); // ssubwt z20.d, z14.d, z13.s
+ // vl128 state = 0x4f2ef46f
+ __ dci(0x45dd5dd5); // usubwt z21.d, z14.d, z29.s
+ // vl128 state = 0x0c9ab301
+ __ dci(0x45dd5dc5); // usubwt z5.d, z14.d, z29.s
+ // vl128 state = 0x67a10e22
+ __ dci(0x454d5dd5); // usubwt z21.h, z14.h, z13.b
+ // vl128 state = 0xb4bd21c0
+ __ dci(0x454d4dfd); // uaddwt z29.h, z15.h, z13.b
+ // vl128 state = 0x8df5f90f
+ __ dci(0x45494fed); // uaddwt z13.h, z31.h, z9.b
+ // vl128 state = 0x913f7aa4
+ __ dci(0x45cb4fef); // uaddwt z15.d, z31.d, z11.s
+ // vl128 state = 0xa23d1307
+ __ dci(0x454b47ff); // saddwt z31.h, z31.h, z11.b
+ // vl128 state = 0x026ff306
+ __ dci(0x454747f7); // saddwt z23.h, z31.h, z7.b
+ // vl128 state = 0x9abf0566
+ __ dci(0x45c743f6); // saddwb z22.d, z31.d, z7.s
+ // vl128 state = 0x27031d0e
+ __ dci(0x45c74b66); // uaddwb z6.d, z27.d, z7.s
+ // vl128 state = 0xc6f3a976
+ __ dci(0x45474be4); // uaddwb z4.h, z31.h, z7.b
+ // vl128 state = 0xededea24
+ __ dci(0x454349e0); // uaddwb z0.h, z15.h, z3.b
+ // vl128 state = 0xf1092d40
+ __ dci(0x454359c1); // usubwb z1.h, z14.h, z3.b
+ // vl128 state = 0x2d96f026
+ __ dci(0x45535983); // usubwb z3.h, z12.h, z19.b
+ // vl128 state = 0x5a9cab0c
+ __ dci(0x45535981); // usubwb z1.h, z12.h, z19.b
+ // vl128 state = 0x7f8d695f
+ __ dci(0x45535a83); // usubwb z3.h, z20.h, z19.b
+ // vl128 state = 0xb0ae0f62
+ __ dci(0x45d35e81); // usubwt z1.d, z20.d, z19.s
+ // vl128 state = 0xfe7e227b
+ __ dci(0x45d25ec9); // usubwt z9.d, z22.d, z18.s
+ // vl128 state = 0xed9dd734
+ __ dci(0x45d35e88); // usubwt z8.d, z20.d, z19.s
+ // vl128 state = 0x943f8d24
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x943f8d24,
+ 0xfe956248,
+ 0xfefddb40,
+ 0x4d92bfb3,
+ 0x01dcd5b1,
+ 0x29a23c92,
+ 0xb7587530,
+ 0xa56fa28c,
+ 0xa0f8590d,
+ 0xa6b883a4,
+ 0x2e50d1fd,
+ 0x8e976f55,
+ 0xb21bd3b1,
+ 0x0c3586e5,
+ 0xe3d7e7e6,
+ 0xb1e0e34f,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_shift_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4518aafc); // ushllb z28.s, z23.h, #8
+ // vl128 state = 0x07dfb216
+ __ dci(0x4518afec); // ushllt z12.s, z31.h, #8
+ // vl128 state = 0xe3c5d68c
+ __ dci(0x4518adc4); // ushllt z4.s, z14.h, #8
+ // vl128 state = 0xce8721fc
+ __ dci(0x4518a1c5); // sshllb z5.s, z14.h, #8
+ // vl128 state = 0x71820bae
+ __ dci(0x4508a9cd); // ushllb z13.h, z14.b, #0
+ // vl128 state = 0xfdc3f7b3
+ __ dci(0x4508ad9d); // ushllt z29.h, z12.b, #0
+ // vl128 state = 0x93c1f606
+ __ dci(0x4508a795); // sshllt z21.h, z28.b, #0
+ // vl128 state = 0x15ebcb72
+ __ dci(0x450caf94); // ushllt z20.h, z28.b, #4
+ // vl128 state = 0x76c630f5
+ __ dci(0x4508afd6); // ushllt z22.h, z30.b, #0
+ // vl128 state = 0xa9c6dfbc
+ __ dci(0x4509aed7); // ushllt z23.h, z22.b, #1
+ // vl128 state = 0xa5942073
+ __ dci(0x4508ae55); // ushllt z21.h, z18.b, #0
+ // vl128 state = 0xe4348777
+ __ dci(0x450cac51); // ushllt z17.h, z2.b, #4
+ // vl128 state = 0x91c6e6ea
+ __ dci(0x450ca870); // ushllb z16.h, z3.b, #4
+ // vl128 state = 0x40393ae8
+ __ dci(0x450ca031); // sshllb z17.h, z1.b, #4
+ // vl128 state = 0x8b9526e8
+ __ dci(0x450aa030); // sshllb z16.h, z1.b, #2
+ // vl128 state = 0xd3d0857a
+ __ dci(0x450aa031); // sshllb z17.h, z1.b, #2
+ // vl128 state = 0xbdd18de2
+ __ dci(0x450ba233); // sshllb z19.h, z17.b, #3
+ // vl128 state = 0x5e5f6f2a
+ __ dci(0x4509a263); // sshllb z3.h, z19.b, #1
+ // vl128 state = 0xa3b5427b
+ __ dci(0x450da673); // sshllt z19.h, z19.b, #5
+ // vl128 state = 0x97472b22
+ __ dci(0x451da477); // sshllt z23.s, z3.h, #13
+ // vl128 state = 0xe6da4012
+ __ dci(0x451da5f6); // sshllt z22.s, z15.h, #13
+ // vl128 state = 0x11630552
+ __ dci(0x450da5b4); // sshllt z20.h, z13.b, #5
+ // vl128 state = 0xe9a4cad0
+ __ dci(0x450da5d5); // sshllt z21.h, z14.b, #5
+ // vl128 state = 0x750d4143
+ __ dci(0x450fa4d7); // sshllt z23.h, z6.b, #7
+ // vl128 state = 0xc441984c
+ __ dci(0x451ba4df); // sshllt z31.s, z6.h, #11
+ // vl128 state = 0x9a3899af
+ __ dci(0x451ba4db); // sshllt z27.s, z6.h, #11
+ // vl128 state = 0xbb6684bb
+ __ dci(0x451ba4bf); // sshllt z31.s, z5.h, #11
+ // vl128 state = 0x45a2cf1e
+ __ dci(0x451aa49b); // sshllt z27.s, z4.h, #10
+ // vl128 state = 0xac10df2f
+ __ dci(0x451aa49f); // sshllt z31.s, z4.h, #10
+ // vl128 state = 0x9cecdbd8
+ __ dci(0x451aa89b); // ushllb z27.s, z4.h, #10
+ // vl128 state = 0x73fca806
+ __ dci(0x4518aa9f); // ushllb z31.s, z20.h, #8
+ // vl128 state = 0xf58883fb
+ __ dci(0x451aaab7); // ushllb z23.s, z21.h, #10
+ // vl128 state = 0xf9476b16
+ __ dci(0x4508aaa7); // ushllb z7.h, z21.b, #0
+ // vl128 state = 0x6f65ea0e
+ __ dci(0x4508ae2f); // ushllt z15.h, z17.b, #0
+ // vl128 state = 0x574341e2
+ __ dci(0x4509ac27); // ushllt z7.h, z1.b, #1
+ // vl128 state = 0xe373d23c
+ __ dci(0x450dae25); // ushllt z5.h, z17.b, #5
+ // vl128 state = 0xc6ad882b
+ __ dci(0x4509aea7); // ushllt z7.h, z21.b, #1
+ // vl128 state = 0xfce8617d
+ __ dci(0x4509adb7); // ushllt z23.h, z13.b, #1
+ // vl128 state = 0x30f63baf
+ __ dci(0x4549ade7); // ushllt z7.d, z15.s, #9
+ // vl128 state = 0x20522e02
+ __ dci(0x4549adf7); // ushllt z23.d, z15.s, #9
+ // vl128 state = 0x18c6aade
+ __ dci(0x4548aff6); // ushllt z22.d, z31.s, #8
+ // vl128 state = 0x3ad49ec9
+ __ dci(0x4548affe); // ushllt z30.d, z31.s, #8
+ // vl128 state = 0x828be22f
+ __ dci(0x4548adda); // ushllt z26.d, z14.s, #8
+ // vl128 state = 0xb4997aa9
+ __ dci(0x4544add2); // ushllt z18.d, z14.s, #4
+ // vl128 state = 0x6e7feb55
+ __ dci(0x454cad42); // ushllt z2.d, z10.s, #12
+ // vl128 state = 0xb8ff410d
+ __ dci(0x450dad40); // ushllt z0.h, z10.b, #5
+ // vl128 state = 0x806bb38f
+ __ dci(0x4515ad50); // ushllt z16.s, z10.h, #5
+ // vl128 state = 0x6bd247ad
+ __ dci(0x4557ad51); // ushllt z17.d, z10.s, #23
+ // vl128 state = 0xc0959f27
+ __ dci(0x4557ad41); // ushllt z1.d, z10.s, #23
+ // vl128 state = 0xf0176482
+ __ dci(0x4557ad40); // ushllt z0.d, z10.s, #23
+ // vl128 state = 0xd5c958bf
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xd5c958bf,
+ 0xb7546431,
+ 0xee4f6b9f,
+ 0x74f31aeb,
+ 0x98282a7a,
+ 0xf2423509,
+ 0xe3ae7c5c,
+ 0xe544e7ba,
+ 0x7d52fba5,
+ 0x1520b68d,
+ 0xee539501,
+ 0x1a65ba45,
+ 0x0d4c2383,
+ 0x9f4a30c5,
+ 0xca6662a2,
+ 0x64dc5f23,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_shift_narrow) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x456b1458); // shrnt z24.s, z2.d, #21
+ // vl128 state = 0x70323182
+ __ dci(0x456b145c); // shrnt z28.s, z2.d, #21
+ // vl128 state = 0x1d620da3
+ __ dci(0x45291454); // shrnt z20.b, z2.h, #7
+ // vl128 state = 0x8e6d3a55
+ __ dci(0x4539141c); // shrnt z28.h, z0.s, #7
+ // vl128 state = 0xbc19c1cc
+ __ dci(0x453914b8); // shrnt z24.h, z5.s, #7
+ // vl128 state = 0x0bd4d1e8
+ __ dci(0x453b14f9); // shrnt z25.h, z7.s, #5
+ // vl128 state = 0x15622295
+ __ dci(0x453315fd); // shrnt z29.h, z15.s, #13
+ // vl128 state = 0x45bf3b94
+ __ dci(0x45331d75); // rshrnt z21.h, z11.s, #13
+ // vl128 state = 0xbb3574e6
+ __ dci(0x45331945); // rshrnb z5.h, z10.s, #13
+ // vl128 state = 0x7b72be5f
+ __ dci(0x45331941); // rshrnb z1.h, z10.s, #13
+ // vl128 state = 0x073cdf1a
+ __ dci(0x45331949); // rshrnb z9.h, z10.s, #13
+ // vl128 state = 0x3ecd1bf9
+ __ dci(0x453b1979); // rshrnb z25.h, z11.s, #5
+ // vl128 state = 0x19f7734e
+ __ dci(0x453b11f1); // shrnb z17.h, z15.s, #5
+ // vl128 state = 0x47a3f036
+ __ dci(0x453711f9); // shrnb z25.h, z15.s, #9
+ // vl128 state = 0xff283fe4
+ __ dci(0x453315f8); // shrnt z24.h, z15.s, #13
+ // vl128 state = 0x1c19f8fb
+ __ dci(0x453319f0); // rshrnb z16.h, z15.s, #13
+ // vl128 state = 0x3be08052
+ __ dci(0x453b1972); // rshrnb z18.h, z11.s, #5
+ // vl128 state = 0xc5ae76a0
+ __ dci(0x453b1962); // rshrnb z2.h, z11.s, #5
+ // vl128 state = 0x75ec3872
+ __ dci(0x453b1c60); // rshrnt z0.h, z3.s, #5
+ // vl128 state = 0x9b372229
+ __ dci(0x45331c44); // rshrnt z4.h, z2.s, #13
+ // vl128 state = 0xe4e22904
+ __ dci(0x45371c0c); // rshrnt z12.h, z0.s, #9
+ // vl128 state = 0x12bc6f4b
+ __ dci(0x45331d08); // rshrnt z8.h, z8.s, #13
+ // vl128 state = 0x3ef95245
+ __ dci(0x45331c98); // rshrnt z24.h, z4.s, #13
+ // vl128 state = 0x0a4a0d68
+ __ dci(0x45731e99); // rshrnt z25.s, z20.d, #13
+ // vl128 state = 0xa01ca6c8
+ __ dci(0x457b1a98); // rshrnb z24.s, z20.d, #5
+ // vl128 state = 0x73a50e30
+ __ dci(0x452b1a9c); // rshrnb z28.b, z20.h, #5
+ // vl128 state = 0xbad3deda
+ __ dci(0x452b1818); // rshrnb z24.b, z0.h, #5
+ // vl128 state = 0x579b3c8f
+ __ dci(0x452b181a); // rshrnb z26.b, z0.h, #5
+ // vl128 state = 0xa2b0bf7c
+ __ dci(0x452b181b); // rshrnb z27.b, z0.h, #5
+ // vl128 state = 0x7bebdf9e
+ __ dci(0x45291a1a); // rshrnb z26.b, z16.h, #7
+ // vl128 state = 0x3f90e1b7
+ __ dci(0x45681a12); // rshrnb z18.s, z16.d, #24
+ // vl128 state = 0x57e6295e
+ __ dci(0x45681290); // shrnb z16.s, z20.d, #24
+ // vl128 state = 0xa53f48b5
+ __ dci(0x45281091); // shrnb z17.b, z4.h, #8
+ // vl128 state = 0x65179ab4
+ __ dci(0x45281401); // shrnt z1.b, z0.h, #8
+ // vl128 state = 0x3cc490ba
+ __ dci(0x45281c83); // rshrnt z3.b, z4.h, #8
+ // vl128 state = 0x3bc34e69
+ __ dci(0x45281c93); // rshrnt z19.b, z4.h, #8
+ // vl128 state = 0x6dded0bb
+ __ dci(0x45681cb7); // rshrnt z23.s, z5.d, #24
+ // vl128 state = 0x378f83c0
+ __ dci(0x45291cb6); // rshrnt z22.b, z5.h, #7
+ // vl128 state = 0x7e4d1c44
+ __ dci(0x45391eb2); // rshrnt z18.h, z21.s, #7
+ // vl128 state = 0x66c0b784
+ __ dci(0x45281ea2); // rshrnt z2.b, z21.h, #8
+ // vl128 state = 0x62df2c82
+ __ dci(0x452c1fa0); // rshrnt z0.b, z29.h, #4
+ // vl128 state = 0xd79ee307
+ __ dci(0x456c1ba2); // rshrnb z2.s, z29.d, #20
+ // vl128 state = 0x8ebb2251
+ __ dci(0x45641ab2); // rshrnb z18.s, z21.d, #28
+ // vl128 state = 0x77ec053a
+ __ dci(0x456c12ba); // shrnb z26.s, z21.d, #20
+ // vl128 state = 0xcf94b608
+ __ dci(0x452812b8); // shrnb z24.b, z21.h, #8
+ // vl128 state = 0x3e067a62
+ __ dci(0x4568123a); // shrnb z26.s, z17.d, #24
+ // vl128 state = 0xe451de0f
+ __ dci(0x456c1338); // shrnb z24.s, z25.d, #20
+ // vl128 state = 0x4042d707
+ __ dci(0x456813b9); // shrnb z25.s, z29.d, #24
+ // vl128 state = 0x5184a2aa
+ __ dci(0x456812e9); // shrnb z9.s, z23.d, #24
+ // vl128 state = 0x246344b8
+ __ dci(0x456812e1); // shrnb z1.s, z23.d, #24
+ // vl128 state = 0x76866e79
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x76866e79,
+ 0x42b52927,
+ 0x84a0bfcc,
+ 0xf8226fc2,
+ 0x444f6df5,
+ 0x2f8dcd68,
+ 0x5a48278a,
+ 0x1cdd7f2f,
+ 0x7816d36c,
+ 0xebae972f,
+ 0xa02adfbe,
+ 0xc93cde0f,
+ 0xce43287b,
+ 0x777d6ce0,
+ 0x9d3be904,
+ 0x3e059dd2,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_shift_narrow_usat) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x457a3207); // uqshrnb z7.s, z16.d, #6
+ // vl128 state = 0x4b40d14e
+ __ dci(0x457a3206); // uqshrnb z6.s, z16.d, #6
+ // vl128 state = 0x4dbc0377
+ __ dci(0x457a3204); // uqshrnb z4.s, z16.d, #6
+ // vl128 state = 0xa6fbc7f9
+ __ dci(0x457e3a14); // uqrshrnb z20.s, z16.d, #2
+ // vl128 state = 0x9e9414a9
+ __ dci(0x457b3a15); // uqrshrnb z21.s, z16.d, #5
+ // vl128 state = 0xe8824afd
+ __ dci(0x457b3ab7); // uqrshrnb z23.s, z21.d, #5
+ // vl128 state = 0x81ce1be6
+ __ dci(0x457b3ab6); // uqrshrnb z22.s, z21.d, #5
+ // vl128 state = 0x5e343a1e
+ __ dci(0x457f3af7); // uqrshrnb z23.s, z23.d, #1
+ // vl128 state = 0x09a5c3a0
+ __ dci(0x457b38ff); // uqrshrnb z31.s, z7.d, #5
+ // vl128 state = 0xb50710bf
+ __ dci(0x453338fe); // uqrshrnb z30.h, z7.s, #13
+ // vl128 state = 0xfc719c85
+ __ dci(0x453338ee); // uqrshrnb z14.h, z7.s, #13
+ // vl128 state = 0x157d826a
+ __ dci(0x453b386a); // uqrshrnb z10.h, z3.s, #5
+ // vl128 state = 0x9c735771
+ __ dci(0x452f386e); // uqrshrnb z14.b, z3.h, #1
+ // vl128 state = 0xe03bb4a4
+ __ dci(0x452f3aea); // uqrshrnb z10.b, z23.h, #1
+ // vl128 state = 0xa841b415
+ __ dci(0x452f38ba); // uqrshrnb z26.b, z5.h, #1
+ // vl128 state = 0x55302a6d
+ __ dci(0x452f3878); // uqrshrnb z24.b, z3.h, #1
+ // vl128 state = 0x73bee182
+ __ dci(0x453f385c); // uqrshrnb z28.h, z2.s, #1
+ // vl128 state = 0x75f81ccc
+ __ dci(0x453f397d); // uqrshrnb z29.h, z11.s, #1
+ // vl128 state = 0x856fecc9
+ __ dci(0x457d397c); // uqrshrnb z28.s, z11.d, #3
+ // vl128 state = 0x4b144bf2
+ __ dci(0x457f3878); // uqrshrnb z24.s, z3.d, #1
+ // vl128 state = 0x7ea5dad3
+ __ dci(0x457b3c7a); // uqrshrnt z26.s, z3.d, #5
+ // vl128 state = 0xa7d48543
+ __ dci(0x45633c72); // uqrshrnt z18.s, z3.d, #29
+ // vl128 state = 0x18f647a7
+ __ dci(0x45613d76); // uqrshrnt z22.s, z11.d, #31
+ // vl128 state = 0x96d4081b
+ __ dci(0x45693972); // uqrshrnb z18.s, z11.d, #23
+ // vl128 state = 0xa8369e83
+ __ dci(0x45693d53); // uqrshrnt z19.s, z10.d, #23
+ // vl128 state = 0x7553ff55
+ __ dci(0x45713d51); // uqrshrnt z17.s, z10.d, #15
+ // vl128 state = 0x52a52ecc
+ __ dci(0x45713d99); // uqrshrnt z25.s, z12.d, #15
+ // vl128 state = 0x4de78f7b
+ __ dci(0x45753f9d); // uqrshrnt z29.s, z28.d, #11
+ // vl128 state = 0x0f8948cd
+ __ dci(0x45753f8d); // uqrshrnt z13.s, z28.d, #11
+ // vl128 state = 0x7f2c1b05
+ __ dci(0x45753685); // uqshrnt z5.s, z20.d, #11
+ // vl128 state = 0xbe6f6ea9
+ __ dci(0x457d3784); // uqshrnt z4.s, z28.d, #3
+ // vl128 state = 0x716e1acd
+ __ dci(0x453c3785); // uqshrnt z5.h, z28.s, #4
+ // vl128 state = 0x828a3cbb
+ __ dci(0x453837a4); // uqshrnt z4.h, z29.s, #8
+ // vl128 state = 0x125ddc3c
+ __ dci(0x457a37a6); // uqshrnt z6.s, z29.d, #6
+ // vl128 state = 0x8c5c5d4c
+ __ dci(0x453a37e4); // uqshrnt z4.h, z31.s, #6
+ // vl128 state = 0xdea9801f
+ __ dci(0x453f37ec); // uqshrnt z12.h, z31.s, #1
+ // vl128 state = 0x6caa6537
+ __ dci(0x457f37dc); // uqshrnt z28.s, z30.d, #1
+ // vl128 state = 0x66c0c05d
+ __ dci(0x45773fde); // uqrshrnt z30.s, z30.d, #9
+ // vl128 state = 0xf8d495e2
+ __ dci(0x45653fda); // uqrshrnt z26.s, z30.d, #27
+ // vl128 state = 0xb543c017
+ __ dci(0x45613ffb); // uqrshrnt z27.s, z31.d, #31
+ // vl128 state = 0x58a69fb4
+ __ dci(0x45613feb); // uqrshrnt z11.s, z31.d, #31
+ // vl128 state = 0xb5a04d48
+ __ dci(0x45653fca); // uqrshrnt z10.s, z30.d, #27
+ // vl128 state = 0xd2d445e0
+ __ dci(0x45753fe8); // uqrshrnt z8.s, z31.d, #11
+ // vl128 state = 0x67d89d28
+ __ dci(0x457537ca); // uqshrnt z10.s, z30.d, #11
+ // vl128 state = 0xcaa2b6dc
+ __ dci(0x457d35ce); // uqshrnt z14.s, z14.d, #3
+ // vl128 state = 0x9da6b10f
+ __ dci(0x452d35de); // uqshrnt z30.b, z14.h, #3
+ // vl128 state = 0xda8663db
+ __ dci(0x452d314e); // uqshrnb z14.b, z10.h, #3
+ // vl128 state = 0x761992a9
+ __ dci(0x453d304f); // uqshrnb z15.h, z2.s, #3
+ // vl128 state = 0x71587e6a
+ __ dci(0x453d386e); // uqrshrnb z14.h, z3.s, #3
+ // vl128 state = 0xc6118398
+ __ dci(0x453538ec); // uqrshrnb z12.h, z7.s, #11
+ // vl128 state = 0x5e542c3a
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5e542c3a,
+ 0xd9128c5a,
+ 0x73f430ed,
+ 0x160c07da,
+ 0x7bff9561,
+ 0x4b2d6335,
+ 0x3738197c,
+ 0x2b624a48,
+ 0xbb257999,
+ 0x0d5d8614,
+ 0xb031d1fc,
+ 0x60f2fce2,
+ 0x92770ad6,
+ 0x6e33aa78,
+ 0x8752089b,
+ 0x37b56a40,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_shift_narrow_ssat) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x456c0875); // sqrshrunb z21.s, z3.d, #20
+ // vl128 state = 0x1446427d
+ __ dci(0x456c0877); // sqrshrunb z23.s, z3.d, #20
+ // vl128 state = 0xd839ea94
+ __ dci(0x456c0876); // sqrshrunb z22.s, z3.d, #20
+ // vl128 state = 0xe4dd3104
+ __ dci(0x456e0c77); // sqrshrunt z23.s, z3.d, #18
+ // vl128 state = 0xd86dd8aa
+ __ dci(0x456e0a73); // sqrshrunb z19.s, z19.d, #18
+ // vl128 state = 0x7aacf973
+ __ dci(0x456c0e72); // sqrshrunt z18.s, z19.d, #20
+ // vl128 state = 0x6e7b28b8
+ __ dci(0x456c2c62); // sqrshrnt z2.s, z3.d, #20
+ // vl128 state = 0x242e0a5e
+ __ dci(0x456c24f2); // sqshrnt z18.s, z7.d, #20
+ // vl128 state = 0xf9c993ec
+ __ dci(0x456c2570); // sqshrnt z16.s, z11.d, #20
+ // vl128 state = 0x087c4fc1
+ __ dci(0x456e2478); // sqshrnt z24.s, z3.d, #18
+ // vl128 state = 0x33fdae0c
+ __ dci(0x456e2c30); // sqrshrnt z16.s, z1.d, #18
+ // vl128 state = 0x0c957ea2
+ __ dci(0x456e2d78); // sqrshrnt z24.s, z11.d, #18
+ // vl128 state = 0x0792e58a
+ __ dci(0x456f2970); // sqrshrnb z16.s, z11.d, #17
+ // vl128 state = 0xe7169693
+ __ dci(0x456b2938); // sqrshrnb z24.s, z9.d, #21
+ // vl128 state = 0x1372a92d
+ __ dci(0x45692979); // sqrshrnb z25.s, z11.d, #23
+ // vl128 state = 0xc1c31387
+ __ dci(0x4563297d); // sqrshrnb z29.s, z11.d, #29
+ // vl128 state = 0x50a08538
+ __ dci(0x45632975); // sqrshrnb z21.s, z11.d, #29
+ // vl128 state = 0xda962f25
+ __ dci(0x456309f1); // sqrshrunb z17.s, z15.d, #29
+ // vl128 state = 0xe149814e
+ __ dci(0x457308f3); // sqrshrunb z19.s, z7.d, #13
+ // vl128 state = 0x6d5ea38b
+ __ dci(0x457329fb); // sqrshrnb z27.s, z15.d, #13
+ // vl128 state = 0xee932acb
+ __ dci(0x457721f3); // sqshrnb z19.s, z15.d, #9
+ // vl128 state = 0x7e05914b
+ __ dci(0x45732171); // sqshrnb z17.s, z11.d, #13
+ // vl128 state = 0xe4bf82a4
+ __ dci(0x45722070); // sqshrnb z16.s, z3.d, #14
+ // vl128 state = 0xdfc01530
+ __ dci(0x456a2078); // sqshrnb z24.s, z3.d, #22
+ // vl128 state = 0x6b48fc15
+ __ dci(0x452a287c); // sqrshrnb z28.b, z3.h, #6
+ // vl128 state = 0x45e86048
+ __ dci(0x45282c78); // sqrshrnt z24.b, z3.h, #8
+ // vl128 state = 0xb8dc83dd
+ __ dci(0x45602c68); // sqrshrnt z8.s, z3.d, #32
+ // vl128 state = 0xda536cf8
+ __ dci(0x45602678); // sqshrnt z24.s, z19.d, #32
+ // vl128 state = 0xb548f79b
+ __ dci(0x45682e70); // sqrshrnt z16.s, z19.d, #24
+ // vl128 state = 0xd564dd2d
+ __ dci(0x45682260); // sqshrnb z0.s, z19.d, #24
+ // vl128 state = 0x7b901f9b
+ __ dci(0x45682642); // sqshrnt z2.s, z18.d, #24
+ // vl128 state = 0x1d4fe6f4
+ __ dci(0x45680606); // sqshrunt z6.s, z16.d, #24
+ // vl128 state = 0xe82d65a2
+ __ dci(0x45680282); // sqshrunb z2.s, z20.d, #24
+ // vl128 state = 0x8a1ae6f6
+ __ dci(0x45680283); // sqshrunb z3.s, z20.d, #24
+ // vl128 state = 0x5e345dcf
+ __ dci(0x4568238b); // sqshrnb z11.s, z28.d, #24
+ // vl128 state = 0x31f54470
+ __ dci(0x45682383); // sqshrnb z3.s, z28.d, #24
+ // vl128 state = 0x6b48975d
+ __ dci(0x45682682); // sqshrnt z2.s, z20.d, #24
+ // vl128 state = 0xa9fba153
+ __ dci(0x45782e8a); // sqrshrnt z10.s, z20.d, #8
+ // vl128 state = 0x0fe3100f
+ __ dci(0x45780eba); // sqrshrunt z26.s, z21.d, #8
+ // vl128 state = 0x1a392151
+ __ dci(0x45700e32); // sqrshrunt z18.s, z17.d, #16
+ // vl128 state = 0x08cea935
+ __ dci(0x45700e42); // sqrshrunt z2.s, z18.d, #16
+ // vl128 state = 0x353f24b1
+ __ dci(0x45782e52); // sqrshrnt z18.s, z18.d, #8
+ // vl128 state = 0xe06219d0
+ __ dci(0x45782e42); // sqrshrnt z2.s, z18.d, #8
+ // vl128 state = 0xbb4c6d3b
+ __ dci(0x45742e46); // sqrshrnt z6.s, z18.d, #12
+ // vl128 state = 0x77e7393c
+ __ dci(0x45642ec7); // sqrshrnt z7.s, z22.d, #28
+ // vl128 state = 0x5201634c
+ __ dci(0x45642a97); // sqrshrnb z23.s, z20.d, #28
+ // vl128 state = 0x49c32fc1
+ __ dci(0x45640b87); // sqrshrunb z7.s, z28.d, #28
+ // vl128 state = 0xdd09d56d
+ __ dci(0x45640f0f); // sqrshrunt z15.s, z24.d, #28
+ // vl128 state = 0x50f7d144
+ __ dci(0x45600e0e); // sqrshrunt z14.s, z16.d, #32
+ // vl128 state = 0xd6bbd38a
+ __ dci(0x45620a0f); // sqrshrunb z15.s, z16.d, #30
+ // vl128 state = 0x141e2991
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x141e2991,
+ 0x8cb951d0,
+ 0x74337526,
+ 0x515534c6,
+ 0xe3789189,
+ 0xfee7d505,
+ 0xfaae7ee8,
+ 0x71a110a3,
+ 0x6469dcda,
+ 0xe61425fc,
+ 0x6840f618,
+ 0xbc1b116d,
+ 0xaad97378,
+ 0x5d91b661,
+ 0x9eb84163,
+ 0xf8ca1e37,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_aba_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45c2ca3e); // uabalb z30.d, z17.s, z2.s
+ // vl128 state = 0xac47a81c
+ __ dci(0x45caca7f); // uabalb z31.d, z19.s, z10.s
+ // vl128 state = 0x10cd4e69
+ __ dci(0x455aca7e); // uabalb z30.h, z19.b, z26.b
+ // vl128 state = 0x8fba3755
+ __ dci(0x45daca5f); // uabalb z31.d, z18.s, z26.s
+ // vl128 state = 0x8c18257c
+ __ dci(0x45d8ca1d); // uabalb z29.d, z16.s, z24.s
+ // vl128 state = 0xe6eef5ec
+ __ dci(0x45d8ce95); // uabalt z21.d, z20.s, z24.s
+ // vl128 state = 0x2368baee
+ __ dci(0x4598ce14); // uabalt z20.s, z16.h, z24.h
+ // vl128 state = 0xc9281174
+ __ dci(0x4598ce04); // uabalt z4.s, z16.h, z24.h
+ // vl128 state = 0xa0b5fc24
+ __ dci(0x45d8ce40); // uabalt z0.d, z18.s, z24.s
+ // vl128 state = 0xb3ef6f1d
+ __ dci(0x45daca44); // uabalb z4.d, z18.s, z26.s
+ // vl128 state = 0xcfa3666b
+ __ dci(0x45dace00); // uabalt z0.d, z16.s, z26.s
+ // vl128 state = 0x27bb4ba9
+ __ dci(0x459ece04); // uabalt z4.s, z16.h, z30.h
+ // vl128 state = 0xb6628d3e
+ __ dci(0x458ece80); // uabalt z0.s, z20.h, z14.h
+ // vl128 state = 0xe8db526e
+ __ dci(0x458ec482); // sabalt z2.s, z4.h, z14.h
+ // vl128 state = 0x73cd8386
+ __ dci(0x45cec4a3); // sabalt z3.d, z5.s, z14.s
+ // vl128 state = 0xba1c4507
+ __ dci(0x45cec8a1); // uabalb z1.d, z5.s, z14.s
+ // vl128 state = 0x851cd798
+ __ dci(0x458ec0a9); // sabalb z9.s, z5.h, z14.h
+ // vl128 state = 0xc85973b8
+ __ dci(0x45c6c0ab); // sabalb z11.d, z5.s, z6.s
+ // vl128 state = 0x84072419
+ __ dci(0x4544c0a9); // sabalb z9.h, z5.b, z4.b
+ // vl128 state = 0x533a377a
+ __ dci(0x4550c0a1); // sabalb z1.h, z5.b, z16.b
+ // vl128 state = 0x5a216f3a
+ __ dci(0x4550c0b1); // sabalb z17.h, z5.b, z16.b
+ // vl128 state = 0x9957b992
+ __ dci(0x4552c095); // sabalb z21.h, z4.b, z18.b
+ // vl128 state = 0x666bd8db
+ __ dci(0x4543c094); // sabalb z20.h, z4.b, z3.b
+ // vl128 state = 0xd66d3d52
+ __ dci(0x4543c095); // sabalb z21.h, z4.b, z3.b
+ // vl128 state = 0x5d47b643
+ __ dci(0x4543c385); // sabalb z5.h, z28.b, z3.b
+ // vl128 state = 0x55fc0a65
+ __ dci(0x4543c38d); // sabalb z13.h, z28.b, z3.b
+ // vl128 state = 0xbb5ccc0f
+ __ dci(0x45c3c19d); // sabalb z29.d, z12.s, z3.s
+ // vl128 state = 0xb3dedffd
+ __ dci(0x45d3c595); // sabalt z21.d, z12.s, z19.s
+ // vl128 state = 0xd80597a1
+ __ dci(0x45d2c185); // sabalb z5.d, z12.s, z18.s
+ // vl128 state = 0x29a9fafc
+ __ dci(0x45d2c0b5); // sabalb z21.d, z5.s, z18.s
+ // vl128 state = 0x85dc16cb
+ __ dci(0x45d2c0bd); // sabalb z29.d, z5.s, z18.s
+ // vl128 state = 0xc38b621d
+ __ dci(0x45d2cab9); // uabalb z25.d, z21.s, z18.s
+ // vl128 state = 0x3801ad51
+ __ dci(0x45d0ca9b); // uabalb z27.d, z20.s, z16.s
+ // vl128 state = 0xd5cc0a31
+ __ dci(0x45d0ca39); // uabalb z25.d, z17.s, z16.s
+ // vl128 state = 0x272488a9
+ __ dci(0x45d0ca3d); // uabalb z29.d, z17.s, z16.s
+ // vl128 state = 0xea109c4b
+ __ dci(0x4550ce3c); // uabalt z28.h, z17.b, z16.b
+ // vl128 state = 0x5a9bdb39
+ __ dci(0x4559ce38); // uabalt z24.h, z17.b, z25.b
+ // vl128 state = 0xd90984c9
+ __ dci(0x455bcf39); // uabalt z25.h, z25.b, z27.b
+ // vl128 state = 0x6c0884ed
+ __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b
+ // vl128 state = 0x2f01a6ad
+ __ dci(0x455bceb3); // uabalt z19.h, z21.b, z27.b
+ // vl128 state = 0x72a428e1
+ __ dci(0x455bceb1); // uabalt z17.h, z21.b, z27.b
+ // vl128 state = 0x27adcf54
+ __ dci(0x4559ce21); // uabalt z1.h, z17.b, z25.b
+ // vl128 state = 0xf1899dea
+ __ dci(0x45d9ce05); // uabalt z5.d, z16.s, z25.s
+ // vl128 state = 0x41e92a5c
+ __ dci(0x45dbc604); // sabalt z4.d, z16.s, z27.s
+ // vl128 state = 0x96021962
+ __ dci(0x45d3c634); // sabalt z20.d, z17.s, z19.s
+ // vl128 state = 0x4795c9e2
+ __ dci(0x45dbc235); // sabalb z21.d, z17.s, z27.s
+ // vl128 state = 0x6e2eccdb
+ __ dci(0x45dbc07d); // sabalb z29.d, z3.s, z27.s
+ // vl128 state = 0x2c2e3625
+ __ dci(0x459bc87c); // uabalb z28.s, z3.h, z27.h
+ // vl128 state = 0x618669ad
+ __ dci(0x459bc878); // uabalb z24.s, z3.h, z27.h
+ // vl128 state = 0x2d1a9a08
+ __ dci(0x4593cc79); // uabalt z25.s, z3.h, z19.h
+ // vl128 state = 0xdb6575df
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xdb6575df,
+ 0x691c09fc,
+ 0x6d969d30,
+ 0x83db67a7,
+ 0x8ca1109d,
+ 0x5175b8ff,
+ 0xade3cb1b,
+ 0x1c7b0422,
+ 0x1199a415,
+ 0xd1c715e8,
+ 0x2053b361,
+ 0x577c4450,
+ 0x1557204a,
+ 0xe994b21a,
+ 0xec34be56,
+ 0x1c9e0136,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_add_sub_carry) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4548d4a1); // adclt z1.d, z5.d, z8.d
+ // vl128 state = 0xde78ceb3
+ __ dci(0x4588d4a5); // sbclt z5.s, z5.s, z8.s
+ // vl128 state = 0x35dc8534
+ __ dci(0x4589d421); // sbclt z1.s, z1.s, z9.s
+ // vl128 state = 0xa72d158b
+ __ dci(0x45d9d423); // sbclt z3.d, z1.d, z25.d
+ // vl128 state = 0x197181b9
+ __ dci(0x45dfd433); // sbclt z19.d, z1.d, z31.d
+ // vl128 state = 0xaad0d32d
+ __ dci(0x4597d437); // sbclt z23.s, z1.s, z23.s
+ // vl128 state = 0xb1c42b7d
+ __ dci(0x4597d436); // sbclt z22.s, z1.s, z23.s
+ // vl128 state = 0x6c51a28c
+ __ dci(0x4587d537); // sbclt z23.s, z9.s, z7.s
+ // vl128 state = 0x525b5cf8
+ __ dci(0x4586d727); // sbclt z7.s, z25.s, z6.s
+ // vl128 state = 0x33942ff9
+ __ dci(0x45c6d625); // sbclt z5.d, z17.d, z6.d
+ // vl128 state = 0x24de09b4
+ __ dci(0x45c2d6b5); // sbclt z21.d, z21.d, z2.d
+ // vl128 state = 0xabc0063f
+ __ dci(0x4546d6b7); // adclt z23.d, z21.d, z6.d
+ // vl128 state = 0x52765e95
+ __ dci(0x45c7d6a7); // sbclt z7.d, z21.d, z7.d
+ // vl128 state = 0x7045d250
+ __ dci(0x4547d4a5); // adclt z5.d, z5.d, z7.d
+ // vl128 state = 0xb20f5c2a
+ __ dci(0x4517d4a1); // adclt z1.s, z5.s, z23.s
+ // vl128 state = 0x5c2c9c29
+ __ dci(0x4507d5a5); // adclt z5.s, z13.s, z7.s
+ // vl128 state = 0x788b25f0
+ __ dci(0x4507d5ad); // adclt z13.s, z13.s, z7.s
+ // vl128 state = 0xf27eff1e
+ __ dci(0x4507d0ac); // adclb z12.s, z5.s, z7.s
+ // vl128 state = 0xc0b629de
+ __ dci(0x450ed0ad); // adclb z13.s, z5.s, z14.s
+ // vl128 state = 0x3e15df94
+ __ dci(0x458ad0a9); // sbclb z9.s, z5.s, z10.s
+ // vl128 state = 0x68f64c82
+ __ dci(0x4582d2ad); // sbclb z13.s, z21.s, z2.s
+ // vl128 state = 0x882379e1
+ __ dci(0x4502d3af); // adclb z15.s, z29.s, z2.s
+ // vl128 state = 0x6901994e
+ __ dci(0x450ad32b); // adclb z11.s, z25.s, z10.s
+ // vl128 state = 0xa67e9382
+ __ dci(0x4582d329); // sbclb z9.s, z25.s, z2.s
+ // vl128 state = 0x9451d0c4
+ __ dci(0x4592d22b); // sbclb z11.s, z17.s, z18.s
+ // vl128 state = 0xc19da52e
+ __ dci(0x459ad2a3); // sbclb z3.s, z21.s, z26.s
+ // vl128 state = 0x91065b69
+ __ dci(0x451ad233); // adclb z19.s, z17.s, z26.s
+ // vl128 state = 0xe3fdc4a5
+ __ dci(0x450bd232); // adclb z18.s, z17.s, z11.s
+ // vl128 state = 0x168abbff
+ __ dci(0x450ad2b6); // adclb z22.s, z21.s, z10.s
+ // vl128 state = 0x64d0c940
+ __ dci(0x4582d2b4); // sbclb z20.s, z21.s, z2.s
+ // vl128 state = 0x37307824
+ __ dci(0x4582d6e4); // sbclt z4.s, z23.s, z2.s
+ // vl128 state = 0xd35e02f7
+ __ dci(0x4500d6f4); // adclt z20.s, z23.s, z0.s
+ // vl128 state = 0x017ed1b0
+ __ dci(0x4501d2e4); // adclb z4.s, z23.s, z1.s
+ // vl128 state = 0x327242bc
+ __ dci(0x4501d1f4); // adclb z20.s, z15.s, z1.s
+ // vl128 state = 0x208174e8
+ __ dci(0x4503d1b0); // adclb z16.s, z13.s, z3.s
+ // vl128 state = 0xa5a9f61d
+ __ dci(0x4501d198); // adclb z24.s, z12.s, z1.s
+ // vl128 state = 0x97e22c2b
+ __ dci(0x4501d3da); // adclb z26.s, z30.s, z1.s
+ // vl128 state = 0xd3ac35d5
+ __ dci(0x4501d6de); // adclt z30.s, z22.s, z1.s
+ // vl128 state = 0xab835df9
+ __ dci(0x4503d2dc); // adclb z28.s, z22.s, z3.s
+ // vl128 state = 0xa048599b
+ __ dci(0x4502d6d8); // adclt z24.s, z22.s, z2.s
+ // vl128 state = 0x4c245fee
+ __ dci(0x4502d6d0); // adclt z16.s, z22.s, z2.s
+ // vl128 state = 0x0222f3cc
+ __ dci(0x4502d280); // adclb z0.s, z20.s, z2.s
+ // vl128 state = 0x16bd7f6a
+ __ dci(0x458ad284); // sbclb z4.s, z20.s, z10.s
+ // vl128 state = 0x7ef7d0a2
+ __ dci(0x458ad6d4); // sbclt z20.s, z22.s, z10.s
+ // vl128 state = 0x303d8262
+ __ dci(0x458ad6dc); // sbclt z28.s, z22.s, z10.s
+ // vl128 state = 0x86b8b0e9
+ __ dci(0x458bd7cc); // sbclt z12.s, z30.s, z11.s
+ // vl128 state = 0x068cc5cd
+ __ dci(0x45dbd7ce); // sbclt z14.d, z30.d, z27.d
+ // vl128 state = 0x30acfa7f
+ __ dci(0x45dfd75e); // sbclt z30.d, z26.d, z31.d
+ // vl128 state = 0xdbd8b32a
+ __ dci(0x45ddd7ce); // sbclt z14.d, z30.d, z29.d
+ // vl128 state = 0x59c3c1a9
+ __ dci(0x45ddd7cf); // sbclt z15.d, z30.d, z29.d
+ // vl128 state = 0x5c953a50
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5c953a50,
+ 0x22fea196,
+ 0x084c11a8,
+ 0x6e7e24d1,
+ 0x70965ff7,
+ 0x8c7cb797,
+ 0xdb846b66,
+ 0x512f049d,
+ 0x5c45d25c,
+ 0xa349606f,
+ 0x68a853e5,
+ 0xd92fbeff,
+ 0x52e59a6b,
+ 0xf77ee8ce,
+ 0x6c79623b,
+ 0x7efed6cc,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_add_sub_high) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45fd7464); // subhnt z4.s, z3.d, z29.d
+ // vl128 state = 0x0eea0f4a
+ __ dci(0x45fc7c66); // rsubhnt z6.s, z3.d, z28.d
+ // vl128 state = 0x4dc0d938
+ __ dci(0x45fc7c6e); // rsubhnt z14.s, z3.d, z28.d
+ // vl128 state = 0x33de615e
+ __ dci(0x45f46c7e); // raddhnt z30.s, z3.d, z20.d
+ // vl128 state = 0xa24af7ae
+ __ dci(0x45f06e7c); // raddhnt z28.s, z19.d, z16.d
+ // vl128 state = 0x13883aa2
+ __ dci(0x45b06a6c); // raddhnb z12.h, z19.s, z16.s
+ // vl128 state = 0x5bf75f05
+ __ dci(0x45b96a64); // raddhnb z4.h, z19.s, z25.s
+ // vl128 state = 0x0e489878
+ __ dci(0x45b96820); // raddhnb z0.h, z1.s, z25.s
+ // vl128 state = 0x86df8f5f
+ __ dci(0x45b96a01); // raddhnb z1.h, z16.s, z25.s
+ // vl128 state = 0x0d1563f2
+ __ dci(0x45b96900); // raddhnb z0.h, z8.s, z25.s
+ // vl128 state = 0xd66de87e
+ __ dci(0x45a97904); // rsubhnb z4.h, z8.s, z9.s
+ // vl128 state = 0x0c34bd33
+ __ dci(0x45a9790c); // rsubhnb z12.h, z8.s, z9.s
+ // vl128 state = 0x7892f2c5
+ __ dci(0x45e97988); // rsubhnb z8.s, z12.d, z9.d
+ // vl128 state = 0x9709efbd
+ __ dci(0x45f97909); // rsubhnb z9.s, z8.d, z25.d
+ // vl128 state = 0x029a3116
+ __ dci(0x45ff790d); // rsubhnb z13.s, z8.d, z31.d
+ // vl128 state = 0x48cf21c1
+ __ dci(0x45ff6d05); // raddhnt z5.s, z8.d, z31.d
+ // vl128 state = 0x44c94a11
+ __ dci(0x45ff6dc1); // raddhnt z1.s, z14.d, z31.d
+ // vl128 state = 0x12fab619
+ __ dci(0x45ff79d1); // rsubhnb z17.s, z14.d, z31.d
+ // vl128 state = 0x6f749933
+ __ dci(0x457f7dd0); // rsubhnt z16.b, z14.h, z31.h
+ // vl128 state = 0x404889de
+ __ dci(0x457f75f1); // subhnt z17.b, z15.h, z31.h
+ // vl128 state = 0x1dae2a16
+ __ dci(0x457f75f3); // subhnt z19.b, z15.h, z31.h
+ // vl128 state = 0xc441a9f0
+ __ dci(0x456d75fb); // subhnt z27.b, z15.h, z13.h
+ // vl128 state = 0xdd79f567
+ __ dci(0x45ed7dff); // rsubhnt z31.s, z15.d, z13.d
+ // vl128 state = 0x49b27a1f
+ __ dci(0x45e17dfe); // rsubhnt z30.s, z15.d, z1.d
+ // vl128 state = 0x19cddb35
+ __ dci(0x45e17df6); // rsubhnt z22.s, z15.d, z1.d
+ // vl128 state = 0xea722faa
+ __ dci(0x45e37d72); // rsubhnt z18.s, z11.d, z3.d
+ // vl128 state = 0x907267b3
+ __ dci(0x45737d62); // rsubhnt z2.b, z11.h, z19.h
+ // vl128 state = 0x1e5409d8
+ __ dci(0x45726d6a); // raddhnt z10.b, z11.h, z18.h
+ // vl128 state = 0xce3b87ca
+ __ dci(0x45726f5a); // raddhnt z26.b, z26.h, z18.h
+ // vl128 state = 0x2f330789
+ __ dci(0x45706f18); // raddhnt z24.b, z24.h, z16.h
+ // vl128 state = 0xff09606a
+ __ dci(0x45706f08); // raddhnt z8.b, z24.h, z16.h
+ // vl128 state = 0x062ac37b
+ __ dci(0x45706f09); // raddhnt z9.b, z24.h, z16.h
+ // vl128 state = 0xb12c9142
+ __ dci(0x45786b08); // raddhnb z8.b, z24.h, z24.h
+ // vl128 state = 0x77e41545
+ __ dci(0x45786b0c); // raddhnb z12.b, z24.h, z24.h
+ // vl128 state = 0x1f3a202d
+ __ dci(0x457a6308); // addhnb z8.b, z24.h, z26.h
+ // vl128 state = 0xea51f4b9
+ __ dci(0x45fb6318); // addhnb z24.s, z24.d, z27.d
+ // vl128 state = 0x5b98747e
+ __ dci(0x45b96319); // addhnb z25.h, z24.s, z25.s
+ // vl128 state = 0xdcebf700
+ __ dci(0x45bb621d); // addhnb z29.h, z16.s, z27.s
+ // vl128 state = 0x55a216b1
+ __ dci(0x45b3625f); // addhnb z31.h, z18.s, z19.s
+ // vl128 state = 0x3e86d641
+ __ dci(0x45b3631b); // addhnb z27.h, z24.s, z19.s
+ // vl128 state = 0x36d052e3
+ __ dci(0x45bb6213); // addhnb z19.h, z16.s, z27.s
+ // vl128 state = 0xba012cb8
+ __ dci(0x45bf7217); // subhnb z23.h, z16.s, z31.s
+ // vl128 state = 0xdef826a7
+ __ dci(0x45b67213); // subhnb z19.h, z16.s, z22.s
+ // vl128 state = 0x5cd11781
+ __ dci(0x45b66223); // addhnb z3.h, z17.s, z22.s
+ // vl128 state = 0x2f04c440
+ __ dci(0x45f66a27); // raddhnb z7.s, z17.d, z22.d
+ // vl128 state = 0x486d0d03
+ __ dci(0x45f76825); // raddhnb z5.s, z1.d, z23.d
+ // vl128 state = 0x8a94d5c9
+ __ dci(0x45f668a1); // raddhnb z1.s, z5.d, z22.d
+ // vl128 state = 0x14e8e0e7
+ __ dci(0x45f469b1); // raddhnb z17.s, z13.d, z20.d
+ // vl128 state = 0x19b96fb3
+ __ dci(0x45f469b3); // raddhnb z19.s, z13.d, z20.d
+ // vl128 state = 0xc98e7d4e
+ __ dci(0x45f169b7); // raddhnb z23.s, z13.d, z17.d
+ // vl128 state = 0x7ff24d47
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x7ff24d47,
+ 0xc639a9b3,
+ 0x0a1df4a5,
+ 0x30db6e18,
+ 0xf3e2f795,
+ 0x36ff477d,
+ 0x162f1ca5,
+ 0x36da990b,
+ 0x110b2c35,
+ 0xaf1580f5,
+ 0x14e39873,
+ 0x7f5eb52c,
+ 0x2ececb6f,
+ 0x4e4d71f0,
+ 0x800769d1,
+ 0x1bcbe3a3,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_complex_addition) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4500dc43); // cadd z3.b, z3.b, z2.b, #270
+ // vl128 state = 0x998365c2
+ __ dci(0x4540dc13); // cadd z19.h, z19.h, z0.h, #270
+ // vl128 state = 0xcc866131
+ __ dci(0x4541d81b); // sqcadd z27.h, z27.h, z0.h, #90
+ // vl128 state = 0x2ae23a6a
+ __ dci(0x45c1d853); // sqcadd z19.d, z19.d, z2.d, #90
+ // vl128 state = 0x1f8de2d3
+ __ dci(0x4541d8c3); // sqcadd z3.h, z3.h, z6.h, #90
+ // vl128 state = 0x3655c07c
+ __ dci(0x4541d8d3); // sqcadd z19.h, z19.h, z6.h, #90
+ // vl128 state = 0x3a8fe2d9
+ __ dci(0x4541d811); // sqcadd z17.h, z17.h, z0.h, #90
+ // vl128 state = 0x003c88ea
+ __ dci(0x4540da10); // cadd z16.h, z16.h, z16.h, #90
+ // vl128 state = 0xe20c1375
+ __ dci(0x4540da18); // cadd z24.h, z24.h, z16.h, #90
+ // vl128 state = 0x67bb0270
+ __ dci(0x4540de5a); // cadd z26.h, z26.h, z18.h, #270
+ // vl128 state = 0x7abb4f8f
+ __ dci(0x4540de4a); // cadd z10.h, z10.h, z18.h, #270
+ // vl128 state = 0x42850f11
+ __ dci(0x4500decb); // cadd z11.b, z11.b, z22.b, #270
+ // vl128 state = 0xda605f59
+ __ dci(0x4500da83); // cadd z3.b, z3.b, z20.b, #90
+ // vl128 state = 0x99e63476
+ __ dci(0x4500dc8b); // cadd z11.b, z11.b, z4.b, #270
+ // vl128 state = 0xd444a939
+ __ dci(0x4500dc8f); // cadd z15.b, z15.b, z4.b, #270
+ // vl128 state = 0xde3ad968
+ __ dci(0x4500d99f); // cadd z31.b, z31.b, z12.b, #90
+ // vl128 state = 0xd7cdb177
+ __ dci(0x4540d91e); // cadd z30.h, z30.h, z8.h, #90
+ // vl128 state = 0x74575b36
+ __ dci(0x4541d81a); // sqcadd z26.h, z26.h, z0.h, #90
+ // vl128 state = 0x3d347b0b
+ __ dci(0x4501d83b); // sqcadd z27.b, z27.b, z1.b, #90
+ // vl128 state = 0x03df7859
+ __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90
+ // vl128 state = 0xf0cdbf68
+ __ dci(0x45c1d83e); // sqcadd z30.d, z30.d, z1.d, #90
+ // vl128 state = 0x0931dda4
+ __ dci(0x45c1d83c); // sqcadd z28.d, z28.d, z1.d, #90
+ // vl128 state = 0x460b5369
+ __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90
+ // vl128 state = 0x71af9203
+ __ dci(0x45c1d83f); // sqcadd z31.d, z31.d, z1.d, #90
+ // vl128 state = 0xd6babc53
+ __ dci(0x4581da3e); // sqcadd z30.s, z30.s, z17.s, #90
+ // vl128 state = 0xd3e4f42f
+ __ dci(0x4501d83f); // sqcadd z31.b, z31.b, z1.b, #90
+ // vl128 state = 0x7a594239
+ __ dci(0x4501dcbb); // sqcadd z27.b, z27.b, z5.b, #270
+ // vl128 state = 0x24a5a8c9
+ __ dci(0x4501dfba); // sqcadd z26.b, z26.b, z29.b, #270
+ // vl128 state = 0x0c3df842
+ __ dci(0x4581dfea); // sqcadd z10.s, z10.s, z31.s, #270
+ // vl128 state = 0x6173c97f
+ __ dci(0x4581db7a); // sqcadd z26.s, z26.s, z27.s, #90
+ // vl128 state = 0x55090d5f
+ __ dci(0x4581db1b); // sqcadd z27.s, z27.s, z24.s, #90
+ // vl128 state = 0x63477385
+ __ dci(0x4581da93); // sqcadd z19.s, z19.s, z20.s, #90
+ // vl128 state = 0xc996545e
+ __ dci(0x45c1db92); // sqcadd z18.d, z18.d, z28.d, #90
+ // vl128 state = 0xa48bf827
+ __ dci(0x45c1db93); // sqcadd z19.d, z19.d, z28.d, #90
+ // vl128 state = 0xf5a3b641
+ __ dci(0x45c1daa3); // sqcadd z3.d, z3.d, z21.d, #90
+ // vl128 state = 0x20ad4c28
+ __ dci(0x4581dba7); // sqcadd z7.s, z7.s, z29.s, #90
+ // vl128 state = 0xc9e36e96
+ __ dci(0x45c1daaf); // sqcadd z15.d, z15.d, z21.d, #90
+ // vl128 state = 0x6eb23fd2
+ __ dci(0x45c1daae); // sqcadd z14.d, z14.d, z21.d, #90
+ // vl128 state = 0x585d4d63
+ __ dci(0x4541dae6); // sqcadd z6.h, z6.h, z23.h, #90
+ // vl128 state = 0x827cc0a8
+ __ dci(0x4541daee); // sqcadd z14.h, z14.h, z23.h, #90
+ // vl128 state = 0xe00543a0
+ __ dci(0x4501dabe); // sqcadd z30.b, z30.b, z21.b, #90
+ // vl128 state = 0x2313db47
+ __ dci(0x4501deff); // sqcadd z31.b, z31.b, z23.b, #270
+ // vl128 state = 0xe30d4e83
+ __ dci(0x4501defd); // sqcadd z29.b, z29.b, z23.b, #270
+ // vl128 state = 0xb95d6d94
+ __ dci(0x4501def5); // sqcadd z21.b, z21.b, z23.b, #270
+ // vl128 state = 0x4f18b02e
+ __ dci(0x4501def4); // sqcadd z20.b, z20.b, z23.b, #270
+ // vl128 state = 0x20ae9a78
+ __ dci(0x4501dee4); // sqcadd z4.b, z4.b, z23.b, #270
+ // vl128 state = 0x4eef87a9
+ __ dci(0x4501dee6); // sqcadd z6.b, z6.b, z23.b, #270
+ // vl128 state = 0x1b041a7b
+ __ dci(0x4501dfc2); // sqcadd z2.b, z2.b, z30.b, #270
+ // vl128 state = 0xeaf5e18f
+ __ dci(0x4500df92); // cadd z18.b, z18.b, z28.b, #270
+ // vl128 state = 0xc47ee5e7
+ __ dci(0x4500de13); // cadd z19.b, z19.b, z16.b, #270
+ // vl128 state = 0x6482d75c
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x6482d75c,
+ 0x48d9bd2f,
+ 0xd6bd52ae,
+ 0x56be94f0,
+ 0x620cfb69,
+ 0xb646e0fe,
+ 0x6034718f,
+ 0xd8187657,
+ 0x211218bb,
+ 0xc973a707,
+ 0x6020dcc9,
+ 0x8fadad0c,
+ 0x0132ecbc,
+ 0x3a07eb63,
+ 0x5c20eb82,
+ 0xc92d6cb2,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_bit_permute) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kSVEBitPerm,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x455fbb1a); // bgrp z26.h, z24.h, z31.h
+ // vl128 state = 0x39fb8e5b
+ __ dci(0x451fbb58); // bgrp z24.b, z26.b, z31.b
+ // vl128 state = 0x7fbccdbd
+ __ dci(0x4517bb19); // bgrp z25.b, z24.b, z23.b
+ // vl128 state = 0x67caf176
+ __ dci(0x4517bb18); // bgrp z24.b, z24.b, z23.b
+ // vl128 state = 0x665fd977
+ __ dci(0x4517ba5c); // bgrp z28.b, z18.b, z23.b
+ // vl128 state = 0x0f2c1473
+ __ dci(0x4517ba38); // bgrp z24.b, z17.b, z23.b
+ // vl128 state = 0x253789a0
+ __ dci(0x4517ba3c); // bgrp z28.b, z17.b, z23.b
+ // vl128 state = 0xd3b26fd2
+ __ dci(0x4515ba6c); // bgrp z12.b, z19.b, z21.b
+ // vl128 state = 0x4bad6941
+ __ dci(0x4515bac4); // bgrp z4.b, z22.b, z21.b
+ // vl128 state = 0x7c70d2d2
+ __ dci(0x4517ba86); // bgrp z6.b, z20.b, z23.b
+ // vl128 state = 0x5794816b
+ __ dci(0x4517ba87); // bgrp z7.b, z20.b, z23.b
+ // vl128 state = 0xe67993b1
+ __ dci(0x4515b297); // bext z23.b, z20.b, z21.b
+ // vl128 state = 0x3041b7ee
+ __ dci(0x4517b396); // bext z22.b, z28.b, z23.b
+ // vl128 state = 0xb571d524
+ __ dci(0x451bb386); // bext z6.b, z28.b, z27.b
+ // vl128 state = 0x73ce1823
+ __ dci(0x4513b784); // bdep z4.b, z28.b, z19.b
+ // vl128 state = 0x4264f0f2
+ __ dci(0x4593b7ac); // bdep z12.s, z29.s, z19.s
+ // vl128 state = 0xf9cb9d26
+ __ dci(0x4593b7a8); // bdep z8.s, z29.s, z19.s
+ // vl128 state = 0xa2b310a0
+ __ dci(0x4597b780); // bdep z0.s, z28.s, z23.s
+ // vl128 state = 0xee25c82f
+ __ dci(0x4597b781); // bdep z1.s, z28.s, z23.s
+ // vl128 state = 0xdca7577f
+ __ dci(0x4597b7e3); // bdep z3.s, z31.s, z23.s
+ // vl128 state = 0x32294429
+ __ dci(0x45dfb7e1); // bdep z1.d, z31.d, z31.d
+ // vl128 state = 0xc147e511
+ __ dci(0x455db7e5); // bdep z5.h, z31.h, z29.h
+ // vl128 state = 0x7a51d422
+ __ dci(0x45d5b7e4); // bdep z4.d, z31.d, z21.d
+ // vl128 state = 0x512ad92a
+ __ dci(0x45c7b7ec); // bdep z12.d, z31.d, z7.d
+ // vl128 state = 0xe59fbf5c
+ __ dci(0x4547b7a8); // bdep z8.h, z29.h, z7.h
+ // vl128 state = 0xb85fd3b1
+ __ dci(0x454fb72c); // bdep z12.h, z25.h, z15.h
+ // vl128 state = 0xc820e9d0
+ __ dci(0x4557b724); // bdep z4.h, z25.h, z23.h
+ // vl128 state = 0x814ff3f4
+ __ dci(0x4557bb20); // bgrp z0.h, z25.h, z23.h
+ // vl128 state = 0xc58dee50
+ __ dci(0x4556b321); // bext z1.h, z25.h, z22.h
+ // vl128 state = 0xf19c0956
+ __ dci(0x4556b3e3); // bext z3.h, z31.h, z22.h
+ // vl128 state = 0x2a256808
+ __ dci(0x4546b367); // bext z7.h, z27.h, z6.h
+ // vl128 state = 0x1c6696f4
+ __ dci(0x4556bb66); // bgrp z6.h, z27.h, z22.h
+ // vl128 state = 0x32522ca2
+ __ dci(0x4556bb76); // bgrp z22.h, z27.h, z22.h
+ // vl128 state = 0x33fe6590
+ __ dci(0x45c6bb66); // bgrp z6.d, z27.d, z6.d
+ // vl128 state = 0x45d26723
+ __ dci(0x45c2b976); // bgrp z22.d, z11.d, z2.d
+ // vl128 state = 0x364d9885
+ __ dci(0x4540b974); // bgrp z20.h, z11.h, z0.h
+ // vl128 state = 0x36a0bd94
+ __ dci(0x45c0b164); // bext z4.d, z11.d, z0.d
+ // vl128 state = 0x4ee9a90c
+ __ dci(0x45ccb16c); // bext z12.d, z11.d, z12.d
+ // vl128 state = 0x30c32d69
+ __ dci(0x458cb368); // bext z8.s, z27.s, z12.s
+ // vl128 state = 0xfc2c912f
+ __ dci(0x450cb769); // bdep z9.b, z27.b, z12.b
+ // vl128 state = 0xef976b44
+ __ dci(0x458cb7eb); // bdep z11.s, z31.s, z12.s
+ // vl128 state = 0x6f9e21b8
+ __ dci(0x4588b5ef); // bdep z15.s, z15.s, z8.s
+ // vl128 state = 0xa1f212e2
+ __ dci(0x4598b5ad); // bdep z13.s, z13.s, z24.s
+ // vl128 state = 0xe4286a40
+ __ dci(0x4598b5af); // bdep z15.s, z13.s, z24.s
+ // vl128 state = 0x7d6622e5
+ __ dci(0x4598b6ad); // bdep z13.s, z21.s, z24.s
+ // vl128 state = 0xcd00829c
+ __ dci(0x4518b2af); // bext z15.b, z21.b, z24.b
+ // vl128 state = 0xa8d58b2d
+ __ dci(0x4519b2e7); // bext z7.b, z23.b, z25.b
+ // vl128 state = 0x2b7b7c44
+ __ dci(0x4518b2a6); // bext z6.b, z21.b, z24.b
+ // vl128 state = 0x09c81b7e
+ __ dci(0x4518b2a7); // bext z7.b, z21.b, z24.b
+ // vl128 state = 0xab1b2b22
+ __ dci(0x4519b6a5); // bdep z5.b, z21.b, z25.b
+ // vl128 state = 0x03476e4c
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x03476e4c,
+ 0xcc54e76f,
+ 0x08324d66,
+ 0xcc289ee1,
+ 0xacd3ba43,
+ 0xe961aeda,
+ 0x60a204b1,
+ 0xde020904,
+ 0x0652d1e5,
+ 0x7982dc25,
+ 0x02a2c1cb,
+ 0x4dd9e71b,
+ 0xb57f587f,
+ 0xb75e0d62,
+ 0x78330809,
+ 0xbc7046ae,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_smullb_smullt_umullb_umullt_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x455a7bc2); // umullb z2.h, z30.b, z26.b
+ // vl128 state = 0xe2a2b611
+ __ dci(0x454a7b92); // umullb z18.h, z28.b, z10.b
+ // vl128 state = 0x12b3b0c6
+ __ dci(0x45427bda); // umullb z26.h, z30.b, z2.b
+ // vl128 state = 0x74f4a891
+ __ dci(0x45c67bde); // umullb z30.d, z30.s, z6.s
+ // vl128 state = 0x20402d9f
+ __ dci(0x45467b56); // umullb z22.h, z26.b, z6.b
+ // vl128 state = 0x75e15413
+ __ dci(0x45427f54); // umullt z20.h, z26.b, z2.b
+ // vl128 state = 0x51478ee1
+ __ dci(0x45427fe4); // umullt z4.h, z31.b, z2.b
+ // vl128 state = 0x63381b63
+ __ dci(0x45567fe5); // umullt z5.h, z31.b, z22.b
+ // vl128 state = 0x0967f882
+ __ dci(0x45467df5); // umullt z21.h, z15.b, z6.b
+ // vl128 state = 0x753e96b9
+ __ dci(0x454279f1); // umullb z17.h, z15.b, z2.b
+ // vl128 state = 0xcff906e6
+ __ dci(0x454078f5); // umullb z21.h, z7.b, z0.b
+ // vl128 state = 0x5609bd14
+ __ dci(0x454070d4); // smullb z20.h, z6.b, z0.b
+ // vl128 state = 0xf284d300
+ __ dci(0x45407016); // smullb z22.h, z0.b, z0.b
+ // vl128 state = 0xbb549bf7
+ __ dci(0x45487086); // smullb z6.h, z4.b, z8.b
+ // vl128 state = 0x6ef99ff1
+ __ dci(0x454070c7); // smullb z7.h, z6.b, z0.b
+ // vl128 state = 0x90177a84
+ __ dci(0x45407846); // umullb z6.h, z2.b, z0.b
+ // vl128 state = 0xd3dbb2fe
+ __ dci(0x45417a56); // umullb z22.h, z18.b, z1.b
+ // vl128 state = 0x7d30cf73
+ __ dci(0x45417877); // umullb z23.h, z3.b, z1.b
+ // vl128 state = 0x0623e678
+ __ dci(0x45417807); // umullb z7.h, z0.b, z1.b
+ // vl128 state = 0xe849cf35
+ __ dci(0x454178a3); // umullb z3.h, z5.b, z1.b
+ // vl128 state = 0xcad236a9
+ __ dci(0x45437cab); // umullt z11.h, z5.b, z3.b
+ // vl128 state = 0xc8dfcb1d
+ __ dci(0x454b7c3b); // umullt z27.h, z1.b, z11.b
+ // vl128 state = 0x6136e2d6
+ __ dci(0x454b7a3a); // umullb z26.h, z17.b, z11.b
+ // vl128 state = 0x091beb5a
+ __ dci(0x454b72b2); // smullb z18.h, z21.b, z11.b
+ // vl128 state = 0x932b30ec
+ __ dci(0x454b7622); // smullt z2.h, z17.b, z11.b
+ // vl128 state = 0xee51239c
+ __ dci(0x454b76ea); // smullt z10.h, z23.b, z11.b
+ // vl128 state = 0xf4fcc577
+ __ dci(0x454b74ab); // smullt z11.h, z5.b, z11.b
+ // vl128 state = 0xcf0c8028
+ __ dci(0x454d74bb); // smullt z27.h, z5.b, z13.b
+ // vl128 state = 0x0f8523c8
+ __ dci(0x454d740b); // smullt z11.h, z0.b, z13.b
+ // vl128 state = 0xc02b2f52
+ __ dci(0x454d7403); // smullt z3.h, z0.b, z13.b
+ // vl128 state = 0x11b4180c
+ __ dci(0x45557413); // smullt z19.h, z0.b, z21.b
+ // vl128 state = 0x26eef57a
+ __ dci(0x45557531); // smullt z17.h, z9.b, z21.b
+ // vl128 state = 0x6f3fce98
+ __ dci(0x455574b9); // smullt z25.h, z5.b, z21.b
+ // vl128 state = 0x0d4ac272
+ __ dci(0x455571b1); // smullb z17.h, z13.b, z21.b
+ // vl128 state = 0x7c866a41
+ __ dci(0x455573e1); // smullb z1.h, z31.b, z21.b
+ // vl128 state = 0x9c724758
+ __ dci(0x455473c9); // smullb z9.h, z30.b, z20.b
+ // vl128 state = 0xa9a8d0aa
+ __ dci(0x455473cb); // smullb z11.h, z30.b, z20.b
+ // vl128 state = 0xd7eec117
+ __ dci(0x455473a9); // smullb z9.h, z29.b, z20.b
+ // vl128 state = 0x35caaa62
+ __ dci(0x455473a8); // smullb z8.h, z29.b, z20.b
+ // vl128 state = 0x97a1d399
+ __ dci(0x455473b8); // smullb z24.h, z29.b, z20.b
+ // vl128 state = 0x3adce4ee
+ __ dci(0x455673fa); // smullb z26.h, z31.b, z22.b
+ // vl128 state = 0xd17120ea
+ __ dci(0x455e77ea); // smullt z10.h, z31.b, z30.b
+ // vl128 state = 0x1e238a9e
+ __ dci(0x455677da); // smullt z26.h, z30.b, z22.b
+ // vl128 state = 0xfbccf6c2
+ __ dci(0x454673d8); // smullb z24.h, z30.b, z6.b
+ // vl128 state = 0xa47583be
+ __ dci(0x45c67359); // smullb z25.d, z26.s, z6.s
+ // vl128 state = 0x4e8a9b37
+ __ dci(0x45c47751); // smullt z17.d, z26.s, z4.s
+ // vl128 state = 0xe3c06571
+ __ dci(0x45d67741); // smullt z1.d, z26.s, z22.s
+ // vl128 state = 0x6629e034
+ __ dci(0x45d67b45); // umullb z5.d, z26.s, z22.s
+ // vl128 state = 0x66a99e85
+ __ dci(0x45867b47); // umullb z7.s, z26.h, z6.h
+ // vl128 state = 0xf1cc3339
+ __ dci(0x45867b45); // umullb z5.s, z26.h, z6.h
+ // vl128 state = 0x8bf658d7
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x8bf658d7,
+ 0x82fac555,
+ 0x07c3d434,
+ 0x25d2ee2b,
+ 0xe70f4394,
+ 0x79223404,
+ 0x368ed35f,
+ 0x6565d842,
+ 0xead08c30,
+ 0xae35e083,
+ 0xe1959b85,
+ 0x94ad31e7,
+ 0x9caeda4d,
+ 0x7611d6dc,
+ 0x22977911,
+ 0xcf3754ec,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sqdmullb_sqdmullt_pmullb_pmullb_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x45936164); // sqdmullb z4.s, z11.h, z19.h
+ // vl128 state = 0xacc89592
+ __ dci(0x459161f4); // sqdmullb z20.s, z15.h, z17.h
+ // vl128 state = 0x142c66e5
+ __ dci(0x459563f5); // sqdmullb z21.s, z31.h, z21.h
+ // vl128 state = 0x5cfcb839
+ __ dci(0x45956265); // sqdmullb z5.s, z19.h, z21.h
+ // vl128 state = 0x33616223
+ __ dci(0x45d56235); // sqdmullb z21.d, z17.s, z21.s
+ // vl128 state = 0x987a4a0d
+ __ dci(0x45556031); // sqdmullb z17.h, z1.b, z21.b
+ // vl128 state = 0xf7dd9b01
+ __ dci(0x45506035); // sqdmullb z21.h, z1.b, z16.b
+ // vl128 state = 0x6fa54cf3
+ __ dci(0x45506334); // sqdmullb z20.h, z25.b, z16.b
+ // vl128 state = 0x04398c6e
+ __ dci(0x45486336); // sqdmullb z22.h, z25.b, z8.b
+ // vl128 state = 0x4cda753c
+ __ dci(0x45486334); // sqdmullb z20.h, z25.b, z8.b
+ // vl128 state = 0x53993d4a
+ __ dci(0x45496b35); // pmullb z21.h, z25.b, z9.b
+ // vl128 state = 0xa591f97c
+ __ dci(0x45496b37); // pmullb z23.h, z25.b, z9.b
+ // vl128 state = 0x5cb91e99
+ __ dci(0x45496fb3); // pmullt z19.h, z29.b, z9.b
+ // vl128 state = 0x5031ac4d
+ __ dci(0x45596f3b); // pmullt z27.h, z25.b, z25.b
+ // vl128 state = 0xb0a76e75
+ __ dci(0x455d6f13); // pmullt z19.h, z24.b, z29.b
+ // vl128 state = 0xe84ca196
+ __ dci(0x455d6fb2); // pmullt z18.h, z29.b, z29.b
+ // vl128 state = 0xd294ce54
+ __ dci(0x455c6bb0); // pmullb z16.h, z29.b, z28.b
+ // vl128 state = 0x90f01471
+ __ dci(0x45546bf8); // pmullb z24.h, z31.b, z20.b
+ // vl128 state = 0xd15f23fa
+ __ dci(0x45546bf9); // pmullb z25.h, z31.b, z20.b
+ // vl128 state = 0x62ca83ea
+ __ dci(0x45546bfb); // pmullb z27.h, z31.b, z20.b
+ // vl128 state = 0xf786c1e4
+ __ dci(0x454469eb); // pmullb z11.h, z15.b, z4.b
+ // vl128 state = 0x3cc8c789
+ __ dci(0x455069fb); // pmullb z27.h, z15.b, z16.b
+ // vl128 state = 0xb14709ca
+ __ dci(0x45546dfa); // pmullt z26.h, z15.b, z20.b
+ // vl128 state = 0x38257820
+ __ dci(0x45546df8); // pmullt z24.h, z15.b, z20.b
+ // vl128 state = 0x9cc5cd3a
+ __ dci(0x45576dfc); // pmullt z28.h, z15.b, z23.b
+ // vl128 state = 0x704543ec
+ __ dci(0x45d76d6c); // pmullt z12.d, z11.s, z23.s
+ // vl128 state = 0x15ec8e77
+ __ dci(0x455f6d68); // pmullt z8.h, z11.b, z31.b
+ // vl128 state = 0xfa379a67
+ __ dci(0x45596d6a); // pmullt z10.h, z11.b, z25.b
+ // vl128 state = 0x27fcfa49
+ __ dci(0x45596d7a); // pmullt z26.h, z11.b, z25.b
+ // vl128 state = 0x13883ef0
+ __ dci(0x45596532); // sqdmullt z18.h, z9.b, z25.b
+ // vl128 state = 0x667f8699
+ __ dci(0x45596536); // sqdmullt z22.h, z9.b, z25.b
+ // vl128 state = 0x477ded37
+ __ dci(0x45d16537); // sqdmullt z23.d, z9.s, z17.s
+ // vl128 state = 0x3323eb48
+ __ dci(0x45c16515); // sqdmullt z21.d, z8.s, z1.s
+ // vl128 state = 0x3f581e83
+ __ dci(0x45456517); // sqdmullt z23.h, z8.b, z5.b
+ // vl128 state = 0xd844e48b
+ __ dci(0x45556555); // sqdmullt z21.h, z10.b, z21.b
+ // vl128 state = 0x95e6094e
+ __ dci(0x45c56554); // sqdmullt z20.d, z10.s, z5.s
+ // vl128 state = 0x198a6f75
+ __ dci(0x45cd6456); // sqdmullt z22.d, z2.s, z13.s
+ // vl128 state = 0x4d6b7178
+ __ dci(0x45c96406); // sqdmullt z6.d, z0.s, z9.s
+ // vl128 state = 0xd989cd0f
+ __ dci(0x45d96482); // sqdmullt z2.d, z4.s, z25.s
+ // vl128 state = 0xa80fdf92
+ __ dci(0x45dd6406); // sqdmullt z6.d, z0.s, z29.s
+ // vl128 state = 0x9876a20d
+ __ dci(0x45596404); // sqdmullt z4.h, z0.b, z25.b
+ // vl128 state = 0x5ad5787c
+ __ dci(0x454b6414); // sqdmullt z20.h, z0.b, z11.b
+ // vl128 state = 0x86c077d7
+ __ dci(0x454a601c); // sqdmullb z28.h, z0.b, z10.b
+ // vl128 state = 0xfe867841
+ __ dci(0x4542641d); // sqdmullt z29.h, z0.b, z2.b
+ // vl128 state = 0x7bf363f1
+ __ dci(0x4552643c); // sqdmullt z28.h, z1.b, z18.b
+ // vl128 state = 0x7cf26ed3
+ __ dci(0x4552673d); // sqdmullt z29.h, z25.b, z18.b
+ // vl128 state = 0x748f1a99
+ __ dci(0x45d6673f); // sqdmullt z31.d, z25.s, z22.s
+ // vl128 state = 0xbb15fd07
+ __ dci(0x45d2633d); // sqdmullb z29.d, z25.s, z18.s
+ // vl128 state = 0x28e0985a
+ __ dci(0x455a6339); // sqdmullb z25.h, z25.b, z26.b
+ // vl128 state = 0x9c0da0fd
+ __ dci(0x45526738); // sqdmullt z24.h, z25.b, z18.b
+ // vl128 state = 0xa970ebb8
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xa970ebb8,
+ 0xc665eff5,
+ 0x8cc21595,
+ 0x0ea984f6,
+ 0x1dbce326,
+ 0x0845e911,
+ 0xa6fb6cf4,
+ 0x8544239a,
+ 0x2412d23d,
+ 0xbce6f5e0,
+ 0x780ff264,
+ 0xcf6cf172,
+ 0xef93a3b4,
+ 0x94080541,
+ 0xa0aedeba,
+ 0x8e8bddaa,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sqdmullt_sqdmullb_z_zzi) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x44eae5a9); // sqdmullt z9.d, z13.s, z10.s[#0]
+ // vl128 state = 0x311dfe35
+ __ dci(0x44eae9a1); // sqdmullb z1.d, z13.s, z10.s[#1]
+ // vl128 state = 0x559243c3
+ __ dci(0x44eae9a5); // sqdmullb z5.d, z13.s, z10.s[#1]
+ // vl128 state = 0x44d6824c
+ __ dci(0x44e2edad); // sqdmullt z13.d, z13.s, z2.s[#1]
+ // vl128 state = 0xb5539592
+ __ dci(0x44e6e9ac); // sqdmullb z12.d, z13.s, z6.s[#1]
+ // vl128 state = 0x5e66b9f8
+ __ dci(0x44e4ebae); // sqdmullb z14.d, z29.s, z4.s[#1]
+ // vl128 state = 0x4347620a
+ __ dci(0x44e4ebaf); // sqdmullb z15.d, z29.s, z4.s[#1]
+ // vl128 state = 0xe7cfe898
+ __ dci(0x44a5ebad); // sqdmullb z13.s, z29.h, z5.h[#1]
+ // vl128 state = 0x0ca455c7
+ __ dci(0x44a5e9fd); // sqdmullb z29.s, z15.h, z5.h[#1]
+ // vl128 state = 0xcac072a9
+ __ dci(0x44e5e8fc); // sqdmullb z28.d, z7.s, z5.s[#1]
+ // vl128 state = 0xe18e8c66
+ __ dci(0x44ede9ec); // sqdmullb z12.d, z15.s, z13.s[#1]
+ // vl128 state = 0x32f642cb
+ __ dci(0x44ede9fc); // sqdmullb z28.d, z15.s, z13.s[#1]
+ // vl128 state = 0xa0467c8a
+ __ dci(0x44fce9f4); // sqdmullb z20.d, z15.s, z12.s[#3]
+ // vl128 state = 0x7ada4130
+ __ dci(0x44e4e9f6); // sqdmullb z22.d, z15.s, z4.s[#1]
+ // vl128 state = 0xc87deb44
+ __ dci(0x44f4e9d2); // sqdmullb z18.d, z14.s, z4.s[#3]
+ // vl128 state = 0x6dc052ca
+ __ dci(0x44f5e9e2); // sqdmullb z2.d, z15.s, z5.s[#3]
+ // vl128 state = 0xe05110d4
+ __ dci(0x44f5ebb2); // sqdmullb z18.d, z29.s, z5.s[#3]
+ // vl128 state = 0x7ed21594
+ __ dci(0x44b5efba); // sqdmullt z26.s, z29.h, z5.h[#5]
+ // vl128 state = 0x7d5dad40
+ __ dci(0x44b5ef78); // sqdmullt z24.s, z27.h, z5.h[#5]
+ // vl128 state = 0x418f84bc
+ __ dci(0x44f5eb70); // sqdmullb z16.d, z27.s, z5.s[#3]
+ // vl128 state = 0x72d78d32
+ __ dci(0x44e5ebf4); // sqdmullb z20.d, z31.s, z5.s[#1]
+ // vl128 state = 0x391fad35
+ __ dci(0x44e5efbc); // sqdmullt z28.d, z29.s, z5.s[#1]
+ // vl128 state = 0xb2143633
+ __ dci(0x44e1ebbd); // sqdmullb z29.d, z29.s, z1.s[#1]
+ // vl128 state = 0x468dac6e
+ __ dci(0x44f1ebed); // sqdmullb z13.d, z31.s, z1.s[#3]
+ // vl128 state = 0x9ab292bd
+ __ dci(0x44f5efe5); // sqdmullt z5.d, z31.s, z5.s[#3]
+ // vl128 state = 0x4f2bd5d1
+ __ dci(0x44fdeee7); // sqdmullt z7.d, z23.s, z13.s[#3]
+ // vl128 state = 0x7a810779
+ __ dci(0x44fdee25); // sqdmullt z5.d, z17.s, z13.s[#3]
+ // vl128 state = 0x05d23734
+ __ dci(0x44f5ea27); // sqdmullb z7.d, z17.s, z5.s[#3]
+ // vl128 state = 0x878580f5
+ __ dci(0x44f1e225); // sqdmullb z5.d, z17.s, z1.s[#2]
+ // vl128 state = 0x5fa56f94
+ __ dci(0x44e1ea21); // sqdmullb z1.d, z17.s, z1.s[#1]
+ // vl128 state = 0x05f1cdf0
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x05f1cdf0,
+ 0x6b88d4f2,
+ 0x83bf279d,
+ 0x12f21868,
+ 0x6c68a5ce,
+ 0x5710343f,
+ 0xa4d0d0ee,
+ 0x335b20c5,
+ 0x0dd491c5,
+ 0x98966292,
+ 0xb68cdacd,
+ 0xa26f9914,
+ 0x6dd60ced,
+ 0x5cd0d62c,
+ 0xebe3fb25,
+ 0xb264d998,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_xar) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0x04293719); // xar z25.b, z25.b, z24.b, #7
+ // vl128 state = 0x596046c4
+ __ dci(0x04293531); // xar z17.b, z17.b, z9.b, #7
+ // vl128 state = 0x38332d55
+ __ dci(0x04e93533); // xar z19.d, z19.d, z9.d, #23
+ // vl128 state = 0x535c8af7
+ __ dci(0x046b3523); // xar z3.s, z3.s, z9.s, #21
+ // vl128 state = 0x879a489f
+ __ dci(0x04eb3427); // xar z7.d, z7.d, z1.d, #21
+ // vl128 state = 0xfbac317f
+ __ dci(0x04ea3463); // xar z3.d, z3.d, z3.d, #22
+ // vl128 state = 0xfb44482e
+ __ dci(0x04fa3447); // xar z7.d, z7.d, z2.d, #6
+ // vl128 state = 0xa59e324c
+ __ dci(0x04f8346f); // xar z15.d, z15.d, z3.d, #8
+ // vl128 state = 0x7f064300
+ __ dci(0x0479346b); // xar z11.s, z11.s, z3.s, #7
+ // vl128 state = 0x0c0d3573
+ __ dci(0x0461346a); // xar z10.s, z10.s, z3.s, #31
+ // vl128 state = 0x3c61530d
+ __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28
+ // vl128 state = 0x137c1433
+ __ dci(0x04643469); // xar z9.s, z9.s, z3.s, #28
+ // vl128 state = 0x81d55bb1
+ __ dci(0x0464346b); // xar z11.s, z11.s, z3.s, #28
+ // vl128 state = 0xad2ac5c0
+ __ dci(0x0434346a); // xar z10.h, z10.h, z3.h, #12
+ // vl128 state = 0x2997a1d9
+ __ dci(0x04b434fa); // xar z26.d, z26.d, z7.d, #44
+ // vl128 state = 0x715f758d
+ __ dci(0x04e434f2); // xar z18.d, z18.d, z7.d, #28
+ // vl128 state = 0x8bfa19ef
+ __ dci(0x04ec34b3); // xar z19.d, z19.d, z5.d, #20
+ // vl128 state = 0xa8d646a5
+ __ dci(0x04ae34b7); // xar z23.d, z23.d, z5.d, #50
+ // vl128 state = 0xf590c489
+ __ dci(0x04ae34a7); // xar z7.d, z7.d, z5.d, #50
+ // vl128 state = 0xd6aafb5e
+ __ dci(0x04ae3417); // xar z23.d, z23.d, z0.d, #50
+ // vl128 state = 0xd40a8d1a
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xd40a8d1a,
+ 0x834982b0,
+ 0x6fd8c07b,
+ 0x2654e6f3,
+ 0x79fa44fb,
+ 0xc8a60223,
+ 0xd12f35f0,
+ 0x1e0a3315,
+ 0x6970dcd2,
+ 0x62305aed,
+ 0xb9846a55,
+ 0x1147e436,
+ 0x97a8ceaa,
+ 0xe8f80c0e,
+ 0xea3ab3e7,
+ 0xb2abd654,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_histcnt) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
+ __ dci(0x45e8c2f9); // histcnt z25.d, p0/z, z23.d, z8.d
+ // vl128 state = 0x892c6962
+ __ dci(0x45e8c1f1); // histcnt z17.d, p0/z, z15.d, z8.d
+ // vl128 state = 0x6ef7d729
+ __ dci(0x45e8c3a1); // histcnt z1.d, p0/z, z29.d, z8.d
+ // vl128 state = 0x17654f81
+ __ dci(0x45e8c3a9); // histcnt z9.d, p0/z, z29.d, z8.d
+ // vl128 state = 0xe1a0067e
+ __ dci(0x45e8c0a8); // histcnt z8.d, p0/z, z5.d, z8.d
+ // vl128 state = 0xd41f511b
+ __ dci(0x45e8d0f8); // histcnt z24.d, p4/z, z7.d, z8.d
+ // vl128 state = 0x8b73945a
+ __ dci(0x45e8d0fa); // histcnt z26.d, p4/z, z7.d, z8.d
+ // vl128 state = 0xc175acec
+ __ dci(0x45aad0fb); // histcnt z27.s, p4/z, z7.s, z10.s
+ // vl128 state = 0x44f8385b
+ __ dci(0x45aad2df); // histcnt z31.s, p4/z, z22.s, z10.s
+ // vl128 state = 0x52cd5d17
+ __ dci(0x45aad2dd); // histcnt z29.s, p4/z, z22.s, z10.s
+ // vl128 state = 0x9f8d9611
+ __ dci(0x45abd2f5); // histcnt z21.s, p4/z, z23.s, z11.s
+ // vl128 state = 0x5cc45fb0
+ __ dci(0x45aad0f7); // histcnt z23.s, p4/z, z7.s, z10.s
+ // vl128 state = 0x5096a07f
+ __ dci(0x45aad1b3); // histcnt z19.s, p4/z, z13.s, z10.s
+ // vl128 state = 0xf25781a6
+ __ dci(0x45a8d1f2); // histcnt z18.s, p4/z, z15.s, z8.s
+ // vl128 state = 0xc7025934
+ __ dci(0x45a0d0f6); // histcnt z22.s, p4/z, z7.s, z0.s
+ // vl128 state = 0xcda9c72a
+ __ dci(0x45a0d87e); // histcnt z30.s, p6/z, z3.s, z0.s
+ // vl128 state = 0x75f6bbcc
+ __ dci(0x45a0dc4e); // histcnt z14.s, p7/z, z2.s, z0.s
+ // vl128 state = 0x5e4e9fe0
+ __ dci(0x45a0dc4a); // histcnt z10.s, p7/z, z2.s, z0.s
+ // vl128 state = 0x0ec8d2b8
+ __ dci(0x45b0cc4b); // histcnt z11.s, p3/z, z2.s, z16.s
+ // vl128 state = 0x1228c442
+ __ dci(0x45b0cc43); // histcnt z3.s, p3/z, z2.s, z16.s
+ // vl128 state = 0xc6067f7b
+ __ dci(0x45b8cc73); // histcnt z19.s, p3/z, z3.s, z24.s
+ // vl128 state = 0xf04f9753
+ __ dci(0x45b8d877); // histcnt z23.s, p6/z, z3.s, z24.s
+ // vl128 state = 0xdeb83b41
+ __ dci(0x45b8d47f); // histcnt z31.s, p5/z, z3.s, z24.s
+ // vl128 state = 0x8ab3905f
+ __ dci(0x45b8d46f); // histcnt z15.s, p5/z, z3.s, z24.s
+ // vl128 state = 0x762bf277
+ __ dci(0x45b8d16d); // histcnt z13.s, p4/z, z11.s, z24.s
+ // vl128 state = 0x9a670783
+ __ dci(0x45bcd125); // histcnt z5.s, p4/z, z9.s, z28.s
+ // vl128 state = 0x3e399489
+ __ dci(0x45b8d021); // histcnt z1.s, p4/z, z1.s, z24.s
+ // vl128 state = 0x7fc8f1e7
+ __ dci(0x45f8d220); // histcnt z0.d, p4/z, z17.d, z24.d
+ // vl128 state = 0x9cb004db
+ __ dci(0x45f0d621); // histcnt z1.d, p5/z, z17.d, z16.d
+ // vl128 state = 0xdd4161b5
+ __ dci(0x45a0d625); // histcnt z5.s, p5/z, z17.s, z0.s
+ // vl128 state = 0xb5cb70bb
+ __ dci(0x45a0d4a1); // histcnt z1.s, p5/z, z5.s, z0.s
+ // vl128 state = 0x4452182b
+ __ dci(0x45a0d4a3); // histcnt z3.s, p5/z, z5.s, z0.s
+ // vl128 state = 0x71298d3c
+ __ dci(0x45a0d4a2); // histcnt z2.s, p5/z, z5.s, z0.s
+ // vl128 state = 0xa22914e1
+ __ dci(0x45a2d6a3); // histcnt z3.s, p5/z, z21.s, z2.s
+ // vl128 state = 0x6183bfbc
+ __ dci(0x45a2de21); // histcnt z1.s, p7/z, z17.s, z2.s
+ // vl128 state = 0xd1ebb242
+ __ dci(0x45e2dc20); // histcnt z0.d, p7/z, z1.d, z2.d
+ // vl128 state = 0x297a432d
+ __ dci(0x45e2d8b0); // histcnt z16.d, p6/z, z5.d, z2.d
+ // vl128 state = 0x1d2557c0
+ __ dci(0x45eed8b8); // histcnt z24.d, p6/z, z5.d, z14.d
+ // vl128 state = 0xe6ef07fa
+ __ dci(0x45eed8a8); // histcnt z8.d, p6/z, z5.d, z14.d
+ // vl128 state = 0xaf3665bb
+ __ dci(0x45aed88c); // histcnt z12.s, p6/z, z4.s, z14.s
+ // vl128 state = 0x5c2b38bc
+ __ dci(0x45efd88d); // histcnt z13.d, p6/z, z4.d, z15.d
+ // vl128 state = 0x8d5527d8
+ __ dci(0x45ffc88f); // histcnt z15.d, p2/z, z4.d, z31.d
+ // vl128 state = 0x1d2e08d2
+ __ dci(0x45fbc98d); // histcnt z13.d, p2/z, z12.d, z27.d
+ // vl128 state = 0x007388b0
+ __ dci(0x45bbcd8f); // histcnt z15.s, p3/z, z12.s, z27.s
+ // vl128 state = 0x9008a7ba
+ __ dci(0x45b3cc9f); // histcnt z31.s, p3/z, z4.s, z19.s
+ // vl128 state = 0xc4030ca4
+ __ dci(0x45bbc497); // histcnt z23.s, p1/z, z4.s, z27.s
+ // vl128 state = 0xeaf4a0b6
+ __ dci(0x45fbc415); // histcnt z21.d, p1/z, z0.d, z27.d
+ // vl128 state = 0x03d85428
+ __ dci(0x45ffc517); // histcnt z23.d, p1/z, z8.d, z31.d
+ // vl128 state = 0xa836a751
+ __ dci(0x45fbc596); // histcnt z22.d, p1/z, z12.d, z27.d
+ // vl128 state = 0x77e33f69
+ __ dci(0x45fbc4c6); // histcnt z6.d, p1/z, z6.d, z27.d
+ // vl128 state = 0xf47bb379
+ __ dci(0x45fbc4ce); // histcnt z14.d, p1/z, z6.d, z27.d
+ // vl128 state = 0x6dbfff33
+ __ dci(0x45fad4ca); // histcnt z10.d, p5/z, z6.d, z26.d
+ // vl128 state = 0xbc04915a
+ __ dci(0x45ead45a); // histcnt z26.d, p5/z, z2.d, z10.d
+ // vl128 state = 0x8969b1c5
+ __ dci(0x45aad4ca); // histcnt z10.s, p5/z, z6.s, z10.s
+ // vl128 state = 0x58d2dfac
+ __ dci(0x45aed0ce); // histcnt z14.s, p4/z, z6.s, z14.s
+ // vl128 state = 0xfa793cc7
+ __ dci(0x45aec4c6); // histcnt z6.s, p1/z, z6.s, z14.s
+ // vl128 state = 0xff4c99d8
+ __ dci(0x45abc4c7); // histcnt z7.s, p1/z, z6.s, z11.s
+ // vl128 state = 0x2b44a4ae
+ __ dci(0x45abc4cf); // histcnt z15.s, p1/z, z6.s, z11.s
+ // vl128 state = 0xbb3f8ba4
+ __ dci(0x45a9c44e); // histcnt z14.s, p1/z, z2.s, z9.s
+ // vl128 state = 0x5a3a40a6
+ __ dci(0x45b9c46f); // histcnt z15.s, p1/z, z3.s, z25.s
+ // vl128 state = 0x72e31c5f
+ __ dci(0x45b9c46e); // histcnt z14.s, p1/z, z3.s, z25.s
+ // vl128 state = 0xde56263e
+ __ dci(0x45b1c67e); // histcnt z30.s, p1/z, z19.s, z17.s
+ // vl128 state = 0xc570f0b9
+ __ dci(0x45b5c63a); // histcnt z26.s, p1/z, z17.s, z21.s
+ // vl128 state = 0x72ab1716
+ __ dci(0x45a5c72a); // histcnt z10.s, p1/z, z25.s, z5.s
+ // vl128 state = 0xe8848b2d
+ __ dci(0x45a1c77a); // histcnt z26.s, p1/z, z27.s, z1.s
+ // vl128 state = 0x2975ac38
+ __ dci(0x45a1c77b); // histcnt z27.s, p1/z, z27.s, z1.s
+ // vl128 state = 0xb0638363
+ __ dci(0x45a1c773); // histcnt z19.s, p1/z, z27.s, z1.s
+ // vl128 state = 0xc9620a45
+ __ dci(0x45e9c777); // histcnt z23.d, p1/z, z27.d, z9.d
+ // vl128 state = 0x0414c679
+ __ dci(0x45ebc67f); // histcnt z31.d, p1/z, z19.d, z11.d
+ // vl128 state = 0xc1d4410e
+ __ dci(0x45ebc37b); // histcnt z27.d, p0/z, z27.d, z11.d
+ // vl128 state = 0x3ae32e36
+ __ dci(0x45abd373); // histcnt z19.s, p4/z, z27.s, z11.s
+ // vl128 state = 0x75ffe12c
+ __ dci(0x45fbd363); // histcnt z3.d, p4/z, z27.d, z27.d
+ // vl128 state = 0x4084743b
+ __ dci(0x45ffc36b); // histcnt z11.d, p0/z, z27.d, z31.d
+ // vl128 state = 0xfade136b
+ __ dci(0x45ffc3ca); // histcnt z10.d, p0/z, z30.d, z31.d
+ // vl128 state = 0x60f18f50
+ __ dci(0x45efc2ce); // histcnt z14.d, p0/z, z22.d, z15.d
+ // vl128 state = 0x162ed112
+ __ dci(0x45adc2c6); // histcnt z6.s, p0/z, z22.s, z13.s
+ // vl128 state = 0x4f84cb96
+ __ dci(0x45adc2c4); // histcnt z4.s, p0/z, z22.s, z13.s
+ // vl128 state = 0x5d04ccb6
+ __ dci(0x45a7c2d4); // histcnt z20.s, p0/z, z22.s, z7.s
+ // vl128 state = 0x38efdab7
+ __ dci(0x45a6c0c4); // histcnt z4.s, p0/z, z6.s, z6.s
+ // vl128 state = 0xff7a0a24
+ __ dci(0x45a7c2c0); // histcnt z0.s, p0/z, z22.s, z7.s
+ // vl128 state = 0x5f7b0a31
+ __ dci(0x45a7d6c1); // histcnt z1.s, p5/z, z22.s, z7.s
+ // vl128 state = 0x1e8a6f5f
+ __ dci(0x45afd7c5); // histcnt z5.s, p5/z, z30.s, z15.s
+ // vl128 state = 0x655ed237
+ __ dci(0x45add3d5); // histcnt z21.s, p4/z, z30.s, z13.s
+ // vl128 state = 0x8c7226a9
+ __ dci(0x45add3d4); // histcnt z20.s, p4/z, z30.s, z13.s
+ // vl128 state = 0x727304ad
+ __ dci(0x45bcd3dc); // histcnt z28.s, p4/z, z30.s, z28.s
+ // vl128 state = 0xce4e49d0
+ __ dci(0x45bcd3cc); // histcnt z12.s, p4/z, z30.s, z28.s
+ // vl128 state = 0x5c252d7d
+ __ dci(0x45bcd15c); // histcnt z28.s, p4/z, z10.s, z28.s
+ // vl128 state = 0x5e1163f7
+ __ dci(0x45b5d154); // histcnt z20.s, p4/z, z10.s, z21.s
+ // vl128 state = 0xf77c50ee
+ __ dci(0x45b5d156); // histcnt z22.s, p4/z, z10.s, z21.s
+ // vl128 state = 0xe35c8438
+ __ dci(0x45b3d157); // histcnt z23.s, p4/z, z10.s, z19.s
+ // vl128 state = 0xf6926673
+ __ dci(0x45b3d156); // histcnt z22.s, p4/z, z10.s, z19.s
+ // vl128 state = 0xf9022ad2
+ __ dci(0x45b3c554); // histcnt z20.s, p1/z, z10.s, z19.s
+ // vl128 state = 0xb90dfe28
+ __ dci(0x45bbd55c); // histcnt z28.s, p5/z, z10.s, z27.s
+ // vl128 state = 0x9a939b84
+ __ dci(0x45abd57e); // histcnt z30.s, p5/z, z11.s, z11.s
+ // vl128 state = 0xd9ad8be7
+ __ dci(0x45abcd7a); // histcnt z26.s, p3/z, z11.s, z11.s
+ // vl128 state = 0x14869e4f
+ __ dci(0x45bbc57b); // histcnt z27.s, p1/z, z11.s, z27.s
+ // vl128 state = 0x25130793
+ __ dci(0x45bfcd73); // histcnt z19.s, p3/z, z11.s, z31.s
+ // vl128 state = 0x53adf455
+ __ dci(0x45bfc863); // histcnt z3.s, p2/z, z3.s, z31.s
+ // vl128 state = 0x82fa6c44
+ __ dci(0x45b7cc62); // histcnt z2.s, p3/z, z3.s, z23.s
+ // vl128 state = 0xfaefda71
+ __ dci(0x45b6cce3); // histcnt z3.s, p3/z, z7.s, z22.s
+ // vl128 state = 0xdd697c2a
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xdd697c2a,
+ 0x1415ff61,
+ 0xb9e154c8,
+ 0x566a2af5,
+ 0xef7574b4,
+ 0x6da83471,
+ 0x356d5c4d,
+ 0x798a2403,
+ 0x2c16e862,
+ 0x6fa84021,
+ 0x6e09e8ff,
+ 0xc13a0eb6,
+ 0x88c92928,
+ 0xe51672fe,
+ 0x229b8ed5,
+ 0x9e662757,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_histseg) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 100 * kInstructionSize);
+ __ dci(0x4524a228); // histseg z8.b, z17.b, z4.b
+ // vl128 state = 0x21ed28a1
+ __ dci(0x452ca20c); // histseg z12.b, z16.b, z12.b
+ // vl128 state = 0xc135d593
+ __ dci(0x453ca288); // histseg z8.b, z20.b, z28.b
+ // vl128 state = 0xb86cd6e7
+ __ dci(0x4538a380); // histseg z0.b, z28.b, z24.b
+ // vl128 state = 0xd28ddd71
+ __ dci(0x452aa388); // histseg z8.b, z28.b, z10.b
+ // vl128 state = 0x322d3aa8
+ __ dci(0x452aa38c); // histseg z12.b, z28.b, z10.b
+ // vl128 state = 0x67d668fc
+ __ dci(0x4532a384); // histseg z4.b, z28.b, z18.b
+ // vl128 state = 0xc57505d4
+ __ dci(0x4537a380); // histseg z0.b, z28.b, z23.b
+ // vl128 state = 0xb47d0a11
+ __ dci(0x4535a3a8); // histseg z8.b, z29.b, z21.b
+ // vl128 state = 0x347adf6f
+ __ dci(0x4535a3ac); // histseg z12.b, z29.b, z21.b
+ // vl128 state = 0xb763510c
+ __ dci(0x4535a3ae); // histseg z14.b, z29.b, z21.b
+ // vl128 state = 0xb28319d5
+ __ dci(0x4525a39e); // histseg z30.b, z28.b, z5.b
+ // vl128 state = 0x0adc6533
+ __ dci(0x4525a38e); // histseg z14.b, z28.b, z5.b
+ // vl128 state = 0x248409c6
+ __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b
+ // vl128 state = 0xa71c85d6
+ __ dci(0x452da187); // histseg z7.b, z12.b, z13.b
+ // vl128 state = 0x7314b8a0
+ __ dci(0x4525a1a6); // histseg z6.b, z13.b, z5.b
+ // vl128 state = 0x129013d5
+ __ dci(0x4527a18e); // histseg z14.b, z12.b, z7.b
+ // vl128 state = 0xc6b207b7
+ __ dci(0x4521a18c); // histseg z12.b, z12.b, z1.b
+ // vl128 state = 0x03957bb5
+ __ dci(0x4524a18d); // histseg z13.b, z12.b, z4.b
+ // vl128 state = 0x379af1c6
+ __ dci(0x4524a125); // histseg z5.b, z9.b, z4.b
+ // vl128 state = 0x93c462cc
+ __ dci(0x4522a127); // histseg z7.b, z9.b, z2.b
+ // vl128 state = 0xc95cb1a9
+ __ dci(0x4532a117); // histseg z23.b, z8.b, z18.b
+ // vl128 state = 0xc50e4e66
+ __ dci(0x4533a15f); // histseg z31.b, z10.b, z19.b
+ // vl128 state = 0x76663e3e
+ __ dci(0x4533a14f); // histseg z15.b, z10.b, z19.b
+ // vl128 state = 0x84f5ca5f
+ __ dci(0x4533a0ce); // histseg z14.b, z6.b, z19.b
+ // vl128 state = 0x50d7de3d
+ __ dci(0x453ba1cc); // histseg z12.b, z14.b, z27.b
+ // vl128 state = 0x32e3b53f
+ __ dci(0x453ba0fc); // histseg z28.b, z7.b, z27.b
+ // vl128 state = 0x0a5d4180
+ __ dci(0x452ba2f4); // histseg z20.b, z23.b, z11.b
+ // vl128 state = 0x91b77585
+ __ dci(0x453ba2c4); // histseg z4.b, z22.b, z27.b
+ // vl128 state = 0x5cd0c690
+ __ dci(0x453ba2cc); // histseg z12.b, z22.b, z27.b
+ // vl128 state = 0xa6a5f749
+ __ dci(0x453ba1c8); // histseg z8.b, z14.b, z27.b
+ // vl128 state = 0xe5036937
+ __ dci(0x4529a1c9); // histseg z9.b, z14.b, z9.b
+ // vl128 state = 0x13c620c8
+ __ dci(0x4529a1a8); // histseg z8.b, z13.b, z9.b
+ // vl128 state = 0xbf71d421
+ __ dci(0x4521a198); // histseg z24.b, z12.b, z1.b
+ // vl128 state = 0xe01d1160
+ __ dci(0x4529a1ba); // histseg z26.b, z13.b, z9.b
+ // vl128 state = 0xaa1b29d6
+ __ dci(0x452fa1bb); // histseg z27.b, z13.b, z15.b
+ // vl128 state = 0x2f96bd61
+ __ dci(0x452fa0ff); // histseg z31.b, z7.b, z15.b
+ // vl128 state = 0x5aeb6bec
+ __ dci(0x4527a0de); // histseg z30.b, z6.b, z7.b
+ // vl128 state = 0xbcb1b299
+ __ dci(0x4525a1d6); // histseg z22.b, z14.b, z5.b
+ // vl128 state = 0x0f89ea9b
+ __ dci(0x4525a1d7); // histseg z23.b, z14.b, z5.b
+ // vl128 state = 0xe40f30a2
+ __ dci(0x4521a3df); // histseg z31.b, z30.b, z1.b
+ // vl128 state = 0x342ff33b
+ __ dci(0x4521a197); // histseg z23.b, z12.b, z1.b
+ // vl128 state = 0xdfa92902
+ __ dci(0x4521a187); // histseg z7.b, z12.b, z1.b
+ // vl128 state = 0x8531fa67
+ __ dci(0x4535a186); // histseg z6.b, z12.b, z21.b
+ // vl128 state = 0xe4b55112
+ __ dci(0x4535a196); // histseg z22.b, z12.b, z21.b
+ // vl128 state = 0x5d26970e
+ __ dci(0x4525a097); // histseg z23.b, z4.b, z5.b
+ // vl128 state = 0x7dcb1d13
+ __ dci(0x4525a095); // histseg z21.b, z4.b, z5.b
+ // vl128 state = 0x5fb0789c
+ __ dci(0x452da017); // histseg z23.b, z0.b, z13.b
+ // vl128 state = 0x7f5df281
+ __ dci(0x452da295); // histseg z21.b, z20.b, z13.b
+ // vl128 state = 0x9e6f5eaf
+ __ dci(0x453da39d); // histseg z29.b, z28.b, z29.b
+ // vl128 state = 0x532f95a9
+ __ dci(0x453da39c); // histseg z28.b, z28.b, z29.b
+ // vl128 state = 0x64202514
+ __ dci(0x4535a29e); // histseg z30.b, z20.b, z21.b
+ // vl128 state = 0x44bda972
+ __ dci(0x4535a0bf); // histseg z31.b, z5.b, z21.b
+ // vl128 state = 0x258125d6
+ __ dci(0x4535a0bb); // histseg z27.b, z5.b, z21.b
+ // vl128 state = 0xec63caaf
+ __ dci(0x4537a2b3); // histseg z19.b, z21.b, z23.b
+ // vl128 state = 0xb937b6e8
+ __ dci(0x4525a2b1); // histseg z17.b, z21.b, z5.b
+ // vl128 state = 0x1515ee94
+ __ dci(0x4525a2b5); // histseg z21.b, z21.b, z5.b
+ // vl128 state = 0x4bb06873
+ __ dci(0x4525a0fd); // histseg z29.b, z7.b, z5.b
+ // vl128 state = 0x23446114
+ __ dci(0x4524a079); // histseg z25.b, z3.b, z4.b
+ // vl128 state = 0x48d52cf6
+ __ dci(0x4524a0d8); // histseg z24.b, z6.b, z4.b
+ // vl128 state = 0x0deef019
+ __ dci(0x452ca09c); // histseg z28.b, z4.b, z12.b
+ // vl128 state = 0xaba6e202
+ __ dci(0x453ca018); // histseg z24.b, z0.b, z28.b
+ // vl128 state = 0xee9d3eed
+ __ dci(0x4539a008); // histseg z8.b, z0.b, z25.b
+ // vl128 state = 0x254c57f3
+ __ dci(0x4539a00c); // histseg z12.b, z0.b, z25.b
+ // vl128 state = 0x28fea24d
+ __ dci(0x4531a048); // histseg z8.b, z2.b, z17.b
+ // vl128 state = 0xe32fcb53
+ __ dci(0x4530a0ca); // histseg z10.b, z6.b, z16.b
+ // vl128 state = 0xb3a9860b
+ __ dci(0x4520a0ee); // histseg z14.b, z7.b, z0.b
+ // vl128 state = 0xef9e57fa
+ __ dci(0x4520a1de); // histseg z30.b, z14.b, z0.b
+ // vl128 state = 0x295902e9
+ __ dci(0x4520a38e); // histseg z14.b, z28.b, z0.b
+ // vl128 state = 0x756ed318
+ __ dci(0x4528a30f); // histseg z15.b, z24.b, z8.b
+ // vl128 state = 0x8591dff9
+ __ dci(0x4538a39f); // histseg z31.b, z28.b, z24.b
+ // vl128 state = 0xe4ad535d
+ __ dci(0x4538a39b); // histseg z27.b, z28.b, z24.b
+ // vl128 state = 0x2d4fbc24
+ __ dci(0x4538a093); // histseg z19.b, z4.b, z24.b
+ // vl128 state = 0xd8ee932a
+ __ dci(0x453aa0a3); // histseg z3.b, z5.b, z26.b
+ // vl128 state = 0x768b71a6
+ __ dci(0x453aa0ab); // histseg z11.b, z5.b, z26.b
+ // vl128 state = 0xa78673d7
+ __ dci(0x452ea0bb); // histseg z27.b, z5.b, z14.b
+ // vl128 state = 0x6e649cae
+ __ dci(0x452fa1bf); // histseg z31.b, z13.b, z15.b
+ // vl128 state = 0x0f58100a
+ __ dci(0x452fa1be); // histseg z30.b, z13.b, z15.b
+ // vl128 state = 0xc99f4519
+ __ dci(0x452fa3f6); // histseg z22.b, z31.b, z15.b
+ // vl128 state = 0x700c8305
+ __ dci(0x452fa3f4); // histseg z20.b, z31.b, z15.b
+ // vl128 state = 0xbdecfddc
+ __ dci(0x453fa3b0); // histseg z16.b, z29.b, z31.b
+ // vl128 state = 0x3f5b7578
+ __ dci(0x453fa3b8); // histseg z24.b, z29.b, z31.b
+ // vl128 state = 0xf0076715
+ __ dci(0x453fa228); // histseg z8.b, z17.b, z31.b
+ // vl128 state = 0x3bd60e0b
+ __ dci(0x4536a22a); // histseg z10.b, z17.b, z22.b
+ // vl128 state = 0x1171f63c
+ __ dci(0x4530a23a); // histseg z26.b, z17.b, z16.b
+ // vl128 state = 0x3fef270c
+ __ dci(0x4522a23e); // histseg z30.b, z17.b, z2.b
+ // vl128 state = 0xf928721f
+ __ dci(0x4524a23c); // histseg z28.b, z17.b, z4.b
+ // vl128 state = 0xecec697b
+ __ dci(0x4527a238); // histseg z24.b, z17.b, z7.b
+ // vl128 state = 0x23b07b16
+ __ dci(0x4525a210); // histseg z16.b, z16.b, z5.b
+ // vl128 state = 0x9c1c2ac5
+ __ dci(0x4525a200); // histseg z0.b, z16.b, z5.b
+ // vl128 state = 0xc446f89b
+ __ dci(0x4520a202); // histseg z2.b, z16.b, z0.b
+ // vl128 state = 0x8afba046
+ __ dci(0x4521a303); // histseg z3.b, z24.b, z1.b
+ // vl128 state = 0xf0b0f9f3
+ __ dci(0x4520a201); // histseg z1.b, z16.b, z0.b
+ // vl128 state = 0x8922615b
+ __ dci(0x4528a223); // histseg z3.b, z17.b, z8.b
+ // vl128 state = 0xf36938ee
+ __ dci(0x4528a367); // histseg z7.b, z27.b, z8.b
+ // vl128 state = 0xc2d96c41
+ __ dci(0x452ca3e6); // histseg z6.b, z31.b, z12.b
+ // vl128 state = 0xf15e835f
+ __ dci(0x452ea3c4); // histseg z4.b, z30.b, z14.b
+ // vl128 state = 0xb3964bd8
+ __ dci(0x452da3c6); // histseg z6.b, z30.b, z13.b
+ // vl128 state = 0x8011a4c6
+ __ dci(0x452da0c4); // histseg z4.b, z6.b, z13.b
+ // vl128 state = 0x0fbedf54
+ __ dci(0x4529a0ec); // histseg z12.b, z7.b, z9.b
+ // vl128 state = 0x9a4d7031
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x9a4d7031,
+ 0xebaa80ad,
+ 0x702155a3,
+ 0x181fff8d,
+ 0x7b071373,
+ 0x1bf0af96,
+ 0x9ca15297,
+ 0x615d2f4a,
+ 0x7658b554,
+ 0xd2bf7319,
+ 0xddf8d492,
+ 0xf5938d08,
+ 0xbe354cb1,
+ 0xfe2d5d63,
+ 0x29818684,
+ 0x2c862ef9,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_table) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x05212a38); // tbl z24.b, {z17.b, z18.b}, z1.b
+ // vl128 state = 0xbdd1e1c1
+ __ dci(0x05212810); // tbl z16.b, {z0.b, z1.b}, z1.b
+ // vl128 state = 0x80ca38b6
+ __ dci(0x05e12812); // tbl z18.d, {z0.d, z1.d}, z1.d
+ // vl128 state = 0xb59fe024
+ __ dci(0x05632802); // tbl z2.h, {z0.h, z1.h}, z3.h
+ // vl128 state = 0xfb22b8f9
+ __ dci(0x05e32906); // tbl z6.d, {z8.d, z9.d}, z3.d
+ // vl128 state = 0x78ba34e9
+ __ dci(0x05e22942); // tbl z2.d, {z10.d, z11.d}, z2.d
+ // vl128 state = 0x000b006f
+ __ dci(0x05f22d46); // tbx z6.d, z10.d, z18.d
+ // vl128 state = 0x28b746e5
+ __ dci(0x05f32947); // tbl z7.d, {z10.d, z11.d}, z19.d
+ // vl128 state = 0xfcbf7b93
+ __ dci(0x05e32963); // tbl z3.d, {z11.d, z12.d}, z3.d
+ // vl128 state = 0x2891c0aa
+ __ dci(0x05e33161); // tbl z1.d, {z11.d}, z3.d
+ // vl128 state = 0x3468b9d4
+ __ dci(0x05e13149); // tbl z9.d, {z10.d}, z1.d
+ // vl128 state = 0xc2adf02b
+ __ dci(0x0560314d); // tbl z13.h, {z10.h}, z0.h
+ // vl128 state = 0xff9f1abb
+ __ dci(0x0578314c); // tbl z12.h, {z10.h}, z24.h
+ // vl128 state = 0x2cffcd38
+ __ dci(0x05e83144); // tbl z4.d, {z10.d}, z8.d
+ // vl128 state = 0x8e5ca010
+ __ dci(0x05e83146); // tbl z6.d, {z10.d}, z8.d
+ // vl128 state = 0xa6e0e69a
+ __ dci(0x05b83147); // tbl z7.s, {z10.s}, z24.s
+ // vl128 state = 0x513e6328
+ __ dci(0x053831d7); // tbl z23.b, {z14.b}, z24.b
+ // vl128 state = 0xe2bd7bdf
+ __ dci(0x056831df); // tbl z31.h, {z14.h}, z8.h
+ // vl128 state = 0xf4881e93
+ __ dci(0x0560319e); // tbl z30.h, {z12.h}, z0.h
+ // vl128 state = 0x4cd76275
+ __ dci(0x0522319a); // tbl z26.b, {z12.b}, z2.b
+ // vl128 state = 0x06d15ac3
+ __ dci(0x0522318a); // tbl z10.b, {z12.b}, z2.b
+ // vl128 state = 0x5657179b
+ __ dci(0x0522318e); // tbl z14.b, {z12.b}, z2.b
+ // vl128 state = 0x7def33b7
+ __ dci(0x05a6318a); // tbl z10.s, {z12.s}, z6.s
+ // vl128 state = 0x38ee6756
+ __ dci(0x05b2318b); // tbl z11.s, {z12.s}, z18.s
+ // vl128 state = 0x6ba1d599
+ __ dci(0x05a231bb); // tbl z27.s, {z13.s}, z2.s
+ // vl128 state = 0xee2c412e
+ __ dci(0x05a231ab); // tbl z11.s, {z13.s}, z2.s
+ // vl128 state = 0xa183e51b
+ __ dci(0x05a831af); // tbl z15.s, {z13.s}, z8.s
+ // vl128 state = 0xcd60a839
+ __ dci(0x05ea31a7); // tbl z7.d, {z13.d}, z10.d
+ // vl128 state = 0x3abe2d8b
+ __ dci(0x05fa33af); // tbl z15.d, {z29.d}, z26.d
+ // vl128 state = 0xf596f00c
+ __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d
+ // vl128 state = 0x3e791a5a
+ __ dci(0x057a32be); // tbl z30.h, {z21.h}, z26.h
+ // vl128 state = 0x27f4086e
+ __ dci(0x05fe32ae); // tbl z14.d, {z21.d}, z30.d
+ // vl128 state = 0xec1be238
+ __ dci(0x05fe32aa); // tbl z10.d, {z21.d}, z30.d
+ // vl128 state = 0xa91ab6d9
+ __ dci(0x057e32e2); // tbl z2.h, {z23.h}, z30.h
+ // vl128 state = 0xd1ab825f
+ __ dci(0x057e32e0); // tbl z0.h, {z23.h}, z30.h
+ // vl128 state = 0xca42860c
+ __ dci(0x057f3270); // tbl z16.h, {z19.h}, z31.h
+ // vl128 state = 0xff27daa0
+ __ dci(0x05673271); // tbl z17.h, {z19.h}, z7.h
+ // vl128 state = 0x9b358bbf
+ __ dci(0x05e73379); // tbl z25.d, {z27.d}, z7.d
+ // vl128 state = 0xf0a4c65d
+ __ dci(0x05e3333d); // tbl z29.d, {z25.d}, z3.d
+ // vl128 state = 0x3de40d5b
+ __ dci(0x05e33335); // tbl z21.d, {z25.d}, z3.d
+ // vl128 state = 0xfeadc4fa
+ __ dci(0x05f33137); // tbl z23.d, {z9.d}, z19.d
+ // vl128 state = 0x417c23c2
+ __ dci(0x05b33336); // tbl z22.s, {z25.s}, z19.s
+ // vl128 state = 0x4bd7bddc
+ __ dci(0x05b1323e); // tbl z30.s, {z17.s}, z17.s
+ // vl128 state = 0x525aafe8
+ __ dci(0x05b0303c); // tbl z28.s, {z1.s}, z16.s
+ // vl128 state = 0xee67e295
+ __ dci(0x05b0308c); // tbl z12.s, {z4.s}, z16.s
+ // vl128 state = 0xce1a6811
+ __ dci(0x05b030e8); // tbl z8.s, {z7.s}, z16.s
+ // vl128 state = 0xfba53f74
+ __ dci(0x05a030b8); // tbl z24.s, {z5.s}, z0.s
+ // vl128 state = 0x56a69350
+ __ dci(0x05e830b0); // tbl z16.d, {z5.d}, z8.d
+ // vl128 state = 0xe0665941
+ __ dci(0x05e830b2); // tbl z18.d, {z5.d}, z8.d
+ // vl128 state = 0xc6680470
+ __ dci(0x05e931b3); // tbl z19.d, {z13.d}, z9.d
+ // vl128 state = 0x64a925a9
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x64a925a9,
+ 0x89750b9d,
+ 0xb803659e,
+ 0xa21efc63,
+ 0x67f967b8,
+ 0x4e52e209,
+ 0x42c1692f,
+ 0x4d8539c7,
+ 0x6828f0f4,
+ 0x3c75d27a,
+ 0x2e3341c9,
+ 0xfe4a8f4f,
+ 0xd27b47ae,
+ 0x665d8f8b,
+ 0x3230c584,
+ 0xcf1d6e82,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_cdot) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4488104f); // cdot z15.s, z2.b, z8.b, #0
+ // vl128 state = 0x25fd51d1
+ __ dci(0x448a106e); // cdot z14.s, z3.b, z10.b, #0
+ // vl128 state = 0x490576d5
+ __ dci(0x448a1246); // cdot z6.s, z18.b, z10.b, #0
+ // vl128 state = 0x25a6fe4b
+ __ dci(0x448e12ce); // cdot z14.s, z22.b, z14.b, #0
+ // vl128 state = 0xc378b2df
+ __ dci(0x448412cf); // cdot z15.s, z22.b, z4.b, #0
+ // vl128 state = 0xe92a358d
+ __ dci(0x448412c7); // cdot z7.s, z22.b, z4.b, #0
+ // vl128 state = 0x7408b292
+ __ dci(0x44c41257); // cdot z23.d, z18.h, z4.h, #0
+ // vl128 state = 0xebc02289
+ __ dci(0x448412d5); // cdot z21.s, z22.b, z4.b, #0
+ // vl128 state = 0x9a7c2f1a
+ __ dci(0x448712d7); // cdot z23.s, z22.b, z7.b, #0
+ // vl128 state = 0xed91e0b4
+ __ dci(0x44831295); // cdot z21.s, z20.b, z3.b, #0
+ // vl128 state = 0x3dae4184
+ __ dci(0x44821385); // cdot z5.s, z28.b, z2.b, #0
+ // vl128 state = 0x213fb541
+ __ dci(0x44c213c1); // cdot z1.d, z30.h, z2.h, #0
+ // vl128 state = 0xcba3207a
+ __ dci(0x44c61340); // cdot z0.d, z26.h, z6.h, #0
+ // vl128 state = 0x9d6041f3
+ __ dci(0x44c413d0); // cdot z16.d, z30.h, z4.h, #0
+ // vl128 state = 0x4b931738
+ __ dci(0x44cc12d8); // cdot z24.d, z22.h, z12.h, #0
+ // vl128 state = 0x2503fbcc
+ __ dci(0x448c1ac8); // cdot z8.s, z22.b, z12.b, #180
+ // vl128 state = 0x53bc5303
+ __ dci(0x448c12ec); // cdot z12.s, z23.b, z12.b, #0
+ // vl128 state = 0xb3bf45c7
+ __ dci(0x448812ad); // cdot z13.s, z21.b, z8.b, #0
+ // vl128 state = 0x938b4e4f
+ __ dci(0x44881689); // cdot z9.s, z20.b, z8.b, #90
+ // vl128 state = 0x70106ddd
+ __ dci(0x4498128b); // cdot z11.s, z20.b, z24.b, #0
+ // vl128 state = 0x92108bb2
+ __ dci(0x4498129b); // cdot z27.s, z20.b, z24.b, #0
+ // vl128 state = 0x545230eb
+ __ dci(0x449a12bf); // cdot z31.s, z21.b, z26.b, #0
+ // vl128 state = 0x5cd2fb12
+ __ dci(0x44da10af); // cdot z15.d, z5.h, z26.h, #0
+ // vl128 state = 0xc03d9146
+ __ dci(0x44da10ae); // cdot z14.d, z5.h, z26.h, #0
+ // vl128 state = 0xbc2712f7
+ __ dci(0x44db12be); // cdot z30.d, z21.h, z27.h, #0
+ // vl128 state = 0xccf9d667
+ __ dci(0x449b12ee); // cdot z14.s, z23.b, z27.b, #0
+ // vl128 state = 0x2c1e08f1
+ __ dci(0x449b12ef); // cdot z15.s, z23.b, z27.b, #0
+ // vl128 state = 0x159d17d7
+ __ dci(0x449b14ee); // cdot z14.s, z7.b, z27.b, #90
+ // vl128 state = 0x892c97d3
+ __ dci(0x449b1cac); // cdot z12.s, z5.b, z27.b, #270
+ // vl128 state = 0x3841ce24
+ __ dci(0x449b1aae); // cdot z14.s, z21.b, z27.b, #180
+ // vl128 state = 0x30a24868
+ __ dci(0x449a1aec); // cdot z12.s, z23.b, z26.b, #180
+ // vl128 state = 0x2b836c8a
+ __ dci(0x44981ace); // cdot z14.s, z22.b, z24.b, #180
+ // vl128 state = 0x16a81963
+ __ dci(0x44901a86); // cdot z6.s, z20.b, z16.b, #180
+ // vl128 state = 0x924ac9ee
+ __ dci(0x44981b8e); // cdot z14.s, z28.b, z24.b, #180
+ // vl128 state = 0x3953da61
+ __ dci(0x44891b8a); // cdot z10.s, z28.b, z9.b, #180
+ // vl128 state = 0xad72b6d5
+ __ dci(0x4499138b); // cdot z11.s, z28.b, z25.b, #0
+ // vl128 state = 0x569b1b2c
+ __ dci(0x4498119b); // cdot z27.s, z12.b, z24.b, #0
+ // vl128 state = 0xdbb36925
+ __ dci(0x449c199a); // cdot z26.s, z12.b, z28.b, #180
+ // vl128 state = 0x4be861d1
+ __ dci(0x44901992); // cdot z18.s, z12.b, z16.b, #180
+ // vl128 state = 0x1e83ddb5
+ __ dci(0x44901a90); // cdot z16.s, z20.b, z16.b, #180
+ // vl128 state = 0x180556e0
+ __ dci(0x44911ac0); // cdot z0.s, z22.b, z17.b, #180
+ // vl128 state = 0x2cbf5db5
+ __ dci(0x44951bc1); // cdot z1.s, z30.b, z21.b, #180
+ // vl128 state = 0x428f97bd
+ __ dci(0x44851b40); // cdot z0.s, z26.b, z5.b, #180
+ // vl128 state = 0xe0f0659f
+ __ dci(0x44851a70); // cdot z16.s, z19.b, z5.b, #180
+ // vl128 state = 0x4142d23c
+ __ dci(0x44861a74); // cdot z20.s, z19.b, z6.b, #180
+ // vl128 state = 0x74f7d373
+ __ dci(0x44921a76); // cdot z22.s, z19.b, z18.b, #180
+ // vl128 state = 0x5b4ef670
+ __ dci(0x44921246); // cdot z6.s, z18.b, z18.b, #0
+ // vl128 state = 0x1fe5d31d
+ __ dci(0x44981247); // cdot z7.s, z18.b, z24.b, #0
+ // vl128 state = 0x782a0559
+ __ dci(0x44981746); // cdot z6.s, z26.b, z24.b, #90
+ // vl128 state = 0x84cbc61d
+ __ dci(0x449816c4); // cdot z4.s, z22.b, z24.b, #90
+ // vl128 state = 0x078aa009
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x078aa009,
+ 0x3c4026df,
+ 0x3ae8e644,
+ 0x514dfdcd,
+ 0x2649444a,
+ 0x74a87bbe,
+ 0x14b8e9b3,
+ 0x92c65f4d,
+ 0xa3015fc1,
+ 0xab48b8fa,
+ 0x9e80ef05,
+ 0xb59b0dde,
+ 0xbcf04e6f,
+ 0xa7fa54a1,
+ 0xaed81dfc,
+ 0xdc7ffb07,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_bitwise_ternary) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x04793f99); // bsl1n z25.d, z25.d, z25.d, z28.d
+ // vl128 state = 0x70294e62
+ __ dci(0x04b93f9b); // bsl2n z27.d, z27.d, z25.d, z28.d
+ // vl128 state = 0x0a3f0dc1
+ __ dci(0x04b93f93); // bsl2n z19.d, z19.d, z25.d, z28.d
+ // vl128 state = 0x46500e35
+ __ dci(0x04b93dbb); // bsl2n z27.d, z27.d, z25.d, z13.d
+ // vl128 state = 0x25bdcc83
+ __ dci(0x04b53db9); // bsl2n z25.d, z25.d, z21.d, z13.d
+ // vl128 state = 0x6d33b943
+ __ dci(0x04bd3d29); // bsl2n z9.d, z9.d, z29.d, z9.d
+ // vl128 state = 0xa218e11a
+ __ dci(0x04ad3d0d); // bsl2n z13.d, z13.d, z13.d, z8.d
+ // vl128 state = 0xc5e2f5a2
+ __ dci(0x04a53d4f); // bsl2n z15.d, z15.d, z5.d, z10.d
+ // vl128 state = 0x519e4735
+ __ dci(0x04653d47); // bsl1n z7.d, z7.d, z5.d, z10.d
+ // vl128 state = 0x132f7ce6
+ __ dci(0x04613dc6); // bsl1n z6.d, z6.d, z1.d, z14.d
+ // vl128 state = 0x91bcf19b
+ __ dci(0x04673dc7); // bsl1n z7.d, z7.d, z7.d, z14.d
+ // vl128 state = 0x3bd0ba20
+ __ dci(0x04673dc5); // bsl1n z5.d, z5.d, z7.d, z14.d
+ // vl128 state = 0xbf3b39fa
+ __ dci(0x04e73cc1); // nbsl z1.d, z1.d, z7.d, z6.d
+ // vl128 state = 0xd304b643
+ __ dci(0x04773cc5); // bsl1n z5.d, z5.d, z23.d, z6.d
+ // vl128 state = 0xdd6cd3ce
+ __ dci(0x04773ac1); // bcax z1.d, z1.d, z23.d, z22.d
+ // vl128 state = 0x3f456acf
+ __ dci(0x04773ac3); // bcax z3.d, z3.d, z23.d, z22.d
+ // vl128 state = 0xbe117f80
+ __ dci(0x047739c7); // bcax z7.d, z7.d, z23.d, z14.d
+ // vl128 state = 0xd3cd3dcd
+ __ dci(0x047439c5); // bcax z5.d, z5.d, z20.d, z14.d
+ // vl128 state = 0xee4f636d
+ __ dci(0x04743841); // bcax z1.d, z1.d, z20.d, z2.d
+ // vl128 state = 0xf21b00a1
+ __ dci(0x04753811); // bcax z17.d, z17.d, z21.d, z0.d
+ // vl128 state = 0x597ab14d
+ __ dci(0x04753815); // bcax z21.d, z21.d, z21.d, z0.d
+ // vl128 state = 0xf5d56322
+ __ dci(0x04713917); // bcax z23.d, z23.d, z17.d, z8.d
+ // vl128 state = 0x17f3cedf
+ __ dci(0x04793987); // bcax z7.d, z7.d, z25.d, z12.d
+ // vl128 state = 0x7492c4e5
+ __ dci(0x04693885); // bcax z5.d, z5.d, z9.d, z4.d
+ // vl128 state = 0xb796548c
+ __ dci(0x046838d5); // bcax z21.d, z21.d, z8.d, z6.d
+ // vl128 state = 0xf4e12422
+ __ dci(0x046838d4); // bcax z20.d, z20.d, z8.d, z6.d
+ // vl128 state = 0x16187a4c
+ __ dci(0x043838d6); // eor3 z22.d, z22.d, z24.d, z6.d
+ // vl128 state = 0xd95e6713
+ __ dci(0x043c39de); // eor3 z30.d, z30.d, z28.d, z14.d
+ // vl128 state = 0xb8322807
+ __ dci(0x047c38ce); // bcax z14.d, z14.d, z28.d, z6.d
+ // vl128 state = 0x6871619d
+ __ dci(0x047c38cf); // bcax z15.d, z15.d, z28.d, z6.d
+ // vl128 state = 0x57c5a4af
+ __ dci(0x043c384e); // eor3 z14.d, z14.d, z28.d, z2.d
+ // vl128 state = 0x1a62efdf
+ __ dci(0x0474385e); // bcax z30.d, z30.d, z20.d, z2.d
+ // vl128 state = 0xc9d1ea1e
+ __ dci(0x047c3a4e); // bcax z14.d, z14.d, z28.d, z18.d
+ // vl128 state = 0xd5ced43e
+ __ dci(0x047c3c4f); // bsl1n z15.d, z15.d, z28.d, z2.d
+ // vl128 state = 0x79f22e16
+ __ dci(0x047d3d4b); // bsl1n z11.d, z11.d, z29.d, z10.d
+ // vl128 state = 0xc4ee5d6e
+ __ dci(0x04793c49); // bsl1n z9.d, z9.d, z25.d, z2.d
+ // vl128 state = 0xea11e840
+ __ dci(0x04793c99); // bsl1n z25.d, z25.d, z25.d, z4.d
+ // vl128 state = 0x95221bc2
+ __ dci(0x04613c91); // bsl1n z17.d, z17.d, z1.d, z4.d
+ // vl128 state = 0xa40acfbe
+ __ dci(0x04233c90); // bsl z16.d, z16.d, z3.d, z4.d
+ // vl128 state = 0x8d3ef22f
+ __ dci(0x04233c80); // bsl z0.d, z0.d, z3.d, z4.d
+ // vl128 state = 0xd07d1bb2
+ __ dci(0x04223ca4); // bsl z4.d, z4.d, z2.d, z5.d
+ // vl128 state = 0xa2c4169c
+ __ dci(0x04223ca5); // bsl z5.d, z5.d, z2.d, z5.d
+ // vl128 state = 0x3c6415e5
+ __ dci(0x04a03ca1); // bsl2n z1.d, z1.d, z0.d, z5.d
+ // vl128 state = 0x55b93add
+ __ dci(0x04a03cb1); // bsl2n z17.d, z17.d, z0.d, z5.d
+ // vl128 state = 0x9b86e5b3
+ __ dci(0x04a13cf9); // bsl2n z25.d, z25.d, z1.d, z7.d
+ // vl128 state = 0xdd310e8f
+ __ dci(0x04a13cfd); // bsl2n z29.d, z29.d, z1.d, z7.d
+ // vl128 state = 0xae66fb44
+ __ dci(0x04a13ced); // bsl2n z13.d, z13.d, z1.d, z7.d
+ // vl128 state = 0xc69dd926
+ __ dci(0x04b93ce9); // bsl2n z9.d, z9.d, z25.d, z7.d
+ // vl128 state = 0x15592b37
+ __ dci(0x04b93dcb); // bsl2n z11.d, z11.d, z25.d, z14.d
+ // vl128 state = 0xbfcda4d3
+ __ dci(0x04b83d4f); // bsl2n z15.d, z15.d, z24.d, z10.d
+ // vl128 state = 0xaef1e0b6
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xaef1e0b6,
+ 0xc9b3303f,
+ 0xc547c948,
+ 0x0fc817f7,
+ 0x22d2eab3,
+ 0x225b3ecd,
+ 0xf7a34a06,
+ 0xa07e68ed,
+ 0xdba0f9fa,
+ 0x64199691,
+ 0xa650bfa3,
+ 0xc6bfeab9,
+ 0x7efe63c4,
+ 0x66e4139c,
+ 0xc580dcf5,
+ 0x95687693,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_while) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x257109e3); // whilehs p3.h, w15, w17
+ // vl128 state = 0x4568cc4c
+ __ dci(0x257709f3); // whilehi p3.h, w15, w23
+ // vl128 state = 0xf148a8ac
+ __ dci(0x25f509f7); // whilehi p7.d, w15, w21
+ // vl128 state = 0x2fe3dcb9
+ __ dci(0x257508f5); // whilehi p5.h, w7, w21
+ // vl128 state = 0x88429dee
+ __ dci(0x257100f1); // whilegt p1.h, w7, w17
+ // vl128 state = 0x5a3b89ec
+ __ dci(0x253108f0); // whilehi p0.b, w7, w17
+ // vl128 state = 0x73276c52
+ __ dci(0x253108f1); // whilehi p1.b, w7, w17
+ // vl128 state = 0xa278d7f0
+ __ dci(0x257508f9); // whilehi p9.h, w7, w21
+ // vl128 state = 0xa438aefc
+ __ dci(0x25750858); // whilehi p8.h, w2, w21
+ // vl128 state = 0x33e13c17
+ __ dci(0x25770a50); // whilehi p0.h, w18, w23
+ // vl128 state = 0x01947abe
+ __ dci(0x25751a52); // whilehi p2.h, x18, x21
+ // vl128 state = 0x2cf410f2
+ __ dci(0x25711a7a); // whilehi p10.h, x19, x17
+ // vl128 state = 0x4bb6efc1
+ __ dci(0x25391a78); // whilehi p8.b, x19, x25
+ // vl128 state = 0xec1afdd6
+ __ dci(0x25290a70); // whilehi p0.b, w19, w9
+ // vl128 state = 0xde6fbb7f
+ __ dci(0x25290a78); // whilehi p8.b, w19, w9
+ // vl128 state = 0x79c3a968
+ __ dci(0x25a90b68); // whilehs p8.s, w27, w9
+ // vl128 state = 0x4b32e81a
+ __ dci(0x25a903e9); // whilege p9.s, wzr, w9
+ // vl128 state = 0x994bfc18
+ __ dci(0x25a909ed); // whilehs p13.s, w15, w9
+ // vl128 state = 0x6d6e231f
+ __ dci(0x25a909ef); // whilehs p15.s, w15, w9
+ // vl128 state = 0x41945298
+ __ dci(0x25a909eb); // whilehs p11.s, w15, w9
+ // vl128 state = 0x659ccb75
+ __ dci(0x25b909c9); // whilehs p9.s, w14, w25
+ // vl128 state = 0xd078a7ed
+ __ dci(0x25bd098d); // whilehs p13.s, w12, w29
+ // vl128 state = 0xf6f2d8ae
+ __ dci(0x25b90909); // whilehs p9.s, w8, w25
+ // vl128 state = 0x248bccac
+ __ dci(0x25fb090b); // whilehs p11.d, w8, w27
+ // vl128 state = 0x09b0b9cc
+ __ dci(0x25fb090a); // whilehs p10.d, w8, w27
+ // vl128 state = 0xfa811fef
+ __ dci(0x25eb0b02); // whilehs p2.d, w24, w11
+ // vl128 state = 0xdcb96f30
+ __ dci(0x25eb0bc3); // whilehs p3.d, w30, w11
+ // vl128 state = 0xbae01fd2
+ __ dci(0x25e30acb); // whilehs p11.d, w22, w3
+ // vl128 state = 0xbcfdc2b8
+ __ dci(0x25eb08c9); // whilehs p9.d, w6, w11
+ // vl128 state = 0xdb60ba22
+ __ dci(0x25a308c1); // whilehs p1.s, w6, w3
+ // vl128 state = 0xe895df80
+ __ dci(0x25a108e5); // whilehs p5.s, w7, w1
+ // vl128 state = 0x3aeccb82
+ __ dci(0x25a009e4); // whilehs p4.s, w15, w0
+ // vl128 state = 0xe6b1b3b3
+ __ dci(0x25a009ec); // whilehs p12.s, w15, w0
+ // vl128 state = 0xd2e10d82
+ __ dci(0x25a019ae); // whilehs p14.s, x13, x0
+ // vl128 state = 0x4bf596b8
+ __ dci(0x25e018af); // whilehs p15.d, x5, x0
+ // vl128 state = 0xb8d27541
+ __ dci(0x25e918ad); // whilehs p13.d, x5, x9
+ // vl128 state = 0x01b6f92f
+ __ dci(0x25eb188c); // whilehs p12.d, x4, x11
+ // vl128 state = 0xd3cfed2d
+ __ dci(0x25eb188e); // whilehs p14.d, x4, x11
+ // vl128 state = 0x9947e07e
+ __ dci(0x25e21886); // whilehs p6.d, x4, x2
+ // vl128 state = 0xd9995e11
+ __ dci(0x25a21084); // whilege p4.s, x4, x2
+ // vl128 state = 0xd45d81ed
+ __ dci(0x25b31085); // whilege p5.s, x4, x19
+ // vl128 state = 0x4d67b543
+ __ dci(0x25a3100d); // whilege p13.s, x0, x3
+ // vl128 state = 0x00f0526c
+ __ dci(0x252b101d); // whilegt p13.b, x0, x11
+ // vl128 state = 0x9d176025
+ __ dci(0x253b1095); // whilegt p5.b, x4, x27
+ // vl128 state = 0xd6544089
+ __ dci(0x253b1091); // whilegt p1.b, x4, x27
+ // vl128 state = 0x37d83129
+ __ dci(0x253f10d5); // whilegt p5.b, x6, xzr
+ // vl128 state = 0x8e121615
+ __ dci(0x252f11d4); // whilegt p4.b, x14, x15
+ // vl128 state = 0x83d6c9e9
+ __ dci(0x25af01d5); // whilegt p5.s, w14, w15
+ // vl128 state = 0xe865fad7
+ __ dci(0x25eb01c5); // whilege p5.d, w14, w11
+ // vl128 state = 0x5eaf208e
+ __ dci(0x25fb0144); // whilege p4.d, w10, w27
+ // vl128 state = 0x8cd6348c
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x8cd6348c,
+ 0x42a1f9b4,
+ 0x13fc2001,
+ 0x492cb2ac,
+ 0xa67cfb65,
+ 0x80d4639f,
+ 0xfa388a09,
+ 0x8c7ad8d9,
+ 0x299c5bfe,
+ 0x9183808a,
+ 0x3fc14d86,
+ 0x7cc08a05,
+ 0x9c85cd48,
+ 0xd06e8299,
+ 0x6a107152,
+ 0x81d99d7c,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_cdot_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44bb4ef6); // cdot z22.s, z23.b, z3.b[3], #270
+ // vl128 state = 0x452d1d6e
+ __ dci(0x44b94ff7); // cdot z23.s, z31.b, z1.b[3], #270
+ // vl128 state = 0x546c9569
+ __ dci(0x44b94dd5); // cdot z21.s, z14.b, z1.b[3], #270
+ // vl128 state = 0xa2abf834
+ __ dci(0x44bd45d7); // cdot z23.s, z14.b, z5.b[3], #90
+ // vl128 state = 0xba77ed64
+ __ dci(0x44fc45df); // cdot z31.d, z14.h, z12.h[1], #90
+ // vl128 state = 0xe78163f2
+ __ dci(0x44f441db); // cdot z27.d, z14.h, z4.h[1], #0
+ // vl128 state = 0xca3b116d
+ __ dci(0x44f44dd3); // cdot z19.d, z14.h, z4.h[1], #270
+ // vl128 state = 0x57ba3771
+ __ dci(0x44b44d83); // cdot z3.s, z12.b, z4.b[2], #270
+ // vl128 state = 0x4edccb88
+ __ dci(0x44ac4d82); // cdot z2.s, z12.b, z4.b[1], #270
+ // vl128 state = 0xc9543499
+ __ dci(0x44a84f8a); // cdot z10.s, z28.b, z0.b[1], #270
+ // vl128 state = 0x9d8fe439
+ __ dci(0x44a84d08); // cdot z8.s, z8.b, z0.b[1], #270
+ // vl128 state = 0x3c1bf0cc
+ __ dci(0x44ba4d09); // cdot z9.s, z8.b, z2.b[3], #270
+ // vl128 state = 0x983716f1
+ __ dci(0x44ea4d0d); // cdot z13.d, z8.h, z10.h[0], #270
+ // vl128 state = 0x2df96300
+ __ dci(0x44eb491d); // cdot z29.d, z8.h, z11.h[0], #180
+ // vl128 state = 0xc23edde3
+ __ dci(0x44e9499f); // cdot z31.d, z12.h, z9.h[0], #180
+ // vl128 state = 0xef0ace9d
+ __ dci(0x44e84b9d); // cdot z29.d, z28.h, z8.h[0], #180
+ // vl128 state = 0x2cce8002
+ __ dci(0x44e84b99); // cdot z25.d, z28.h, z8.h[0], #180
+ // vl128 state = 0xd07f46a1
+ __ dci(0x44f84a9d); // cdot z29.d, z20.h, z8.h[1], #180
+ // vl128 state = 0x239831e8
+ __ dci(0x44f84a99); // cdot z25.d, z20.h, z8.h[1], #180
+ // vl128 state = 0xa110988d
+ __ dci(0x44e84a09); // cdot z9.d, z16.h, z8.h[0], #180
+ // vl128 state = 0x2b9ef292
+ __ dci(0x44e84a19); // cdot z25.d, z16.h, z8.h[0], #180
+ // vl128 state = 0x50eeb818
+ __ dci(0x44e04b1b); // cdot z27.d, z24.h, z0.h[0], #180
+ // vl128 state = 0xc33ce03b
+ __ dci(0x44e04a2b); // cdot z11.d, z17.h, z0.h[0], #180
+ // vl128 state = 0xe163b5c9
+ __ dci(0x44e04b0f); // cdot z15.d, z24.h, z0.h[0], #180
+ // vl128 state = 0x052a34eb
+ __ dci(0x44e04b1f); // cdot z31.d, z24.h, z0.h[0], #180
+ // vl128 state = 0x0660afb4
+ __ dci(0x44e84b4f); // cdot z15.d, z26.h, z8.h[0], #180
+ // vl128 state = 0x0ae01233
+ __ dci(0x44ee4b4e); // cdot z14.d, z26.h, z14.h[0], #180
+ // vl128 state = 0xde7bdd15
+ __ dci(0x44ae4b7e); // cdot z30.s, z27.b, z6.b[1], #180
+ // vl128 state = 0x758973a1
+ __ dci(0x44a6497f); // cdot z31.s, z11.b, z6.b[0], #180
+ // vl128 state = 0xb3c5df37
+ __ dci(0x44a64df7); // cdot z23.s, z15.b, z6.b[0], #270
+ // vl128 state = 0xe652f054
+ __ dci(0x44a64c73); // cdot z19.s, z3.b, z6.b[0], #270
+ // vl128 state = 0xc4b58041
+ __ dci(0x44a64de3); // cdot z3.s, z15.b, z6.b[0], #270
+ // vl128 state = 0x1239ca90
+ __ dci(0x44a749e2); // cdot z2.s, z15.b, z7.b[0], #180
+ // vl128 state = 0x4a01cdcb
+ __ dci(0x44a740e0); // cdot z0.s, z7.b, z7.b[0], #0
+ // vl128 state = 0x604e45cf
+ __ dci(0x44a344e2); // cdot z2.s, z7.b, z3.b[0], #90
+ // vl128 state = 0x12fe2972
+ __ dci(0x44a34ca3); // cdot z3.s, z5.b, z3.b[0], #270
+ // vl128 state = 0x78e0bb2e
+ __ dci(0x44e14cb3); // cdot z19.d, z5.h, z1.h[0], #270
+ // vl128 state = 0xe3a69b46
+ __ dci(0x44e14d31); // cdot z17.d, z9.h, z1.h[0], #270
+ // vl128 state = 0xe6b58aa4
+ __ dci(0x44f14d01); // cdot z1.d, z8.h, z1.h[1], #270
+ // vl128 state = 0xffcfb597
+ __ dci(0x44f14551); // cdot z17.d, z10.h, z1.h[1], #90
+ // vl128 state = 0x2745934b
+ __ dci(0x44f345d5); // cdot z21.d, z14.h, z3.h[1], #90
+ // vl128 state = 0xa38b5571
+ __ dci(0x44f34574); // cdot z20.d, z11.h, z3.h[1], #90
+ // vl128 state = 0x978afd92
+ __ dci(0x44f34576); // cdot z22.d, z11.h, z3.h[1], #90
+ // vl128 state = 0x9f1b19c9
+ __ dci(0x44f34f77); // cdot z23.d, z27.h, z3.h[1], #270
+ // vl128 state = 0x61a31d64
+ __ dci(0x44f24f5f); // cdot z31.d, z26.h, z2.h[1], #270
+ // vl128 state = 0x1e71023e
+ __ dci(0x44fa4fcf); // cdot z15.d, z30.h, z10.h[1], #270
+ // vl128 state = 0xdbe5ffb3
+ __ dci(0x44ba4f4e); // cdot z14.s, z26.b, z2.b[3], #270
+ // vl128 state = 0x51390e81
+ __ dci(0x44ba470c); // cdot z12.s, z24.b, z2.b[3], #90
+ // vl128 state = 0x59ad5198
+ __ dci(0x44b2479c); // cdot z28.s, z28.b, z2.b[2], #90
+ // vl128 state = 0xe997de49
+ __ dci(0x44b24fbd); // cdot z29.s, z29.b, z2.b[2], #270
+ // vl128 state = 0x5533cefa
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5533cefa,
+ 0x1462a298,
+ 0x1acb4ead,
+ 0xeb05ddf0,
+ 0x23fe8c86,
+ 0xbb1e9f8c,
+ 0x4a933f43,
+ 0x4cd64b55,
+ 0x84a4b8b7,
+ 0x52019619,
+ 0x4442432b,
+ 0x9b353ce8,
+ 0x333c9eef,
+ 0x291eac87,
+ 0x110f7371,
+ 0x009b25cb,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_splice) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x05ed89a7); // splice z7.d, p2, {z13.d, z14.d}
+ // vl128 state = 0x6acff994
+ __ dci(0x05ed81e5); // splice z5.d, p0, {z15.d, z16.d}
+ // vl128 state = 0x2c8b3e5d
+ __ dci(0x05ed8375); // splice z21.d, p0, {z27.d, z28.d}
+ // vl128 state = 0x2588e208
+ __ dci(0x05ed9174); // splice z20.d, p4, {z11.d, z12.d}
+ // vl128 state = 0x4d6fa6b3
+ __ dci(0x056d91f6); // splice z22.h, p4, {z15.h, z16.h}
+ // vl128 state = 0x9f00a308
+ __ dci(0x056d92f2); // splice z18.h, p4, {z23.h, z24.h}
+ // vl128 state = 0x5479cc74
+ __ dci(0x056d96a2); // splice z2.h, p5, {z21.h, z22.h}
+ // vl128 state = 0xca7a6a63
+ __ dci(0x056d9fa6); // splice z6.h, p7, {z29.h, z30.h}
+ // vl128 state = 0x007fc934
+ __ dci(0x056d9be4); // splice z4.h, p6, {z31.h, z0.h}
+ // vl128 state = 0x8186741b
+ __ dci(0x056d97ec); // splice z12.h, p5, {z31.h, z0.h}
+ // vl128 state = 0x26ab76b9
+ __ dci(0x056d979c); // splice z28.h, p5, {z28.h, z29.h}
+ // vl128 state = 0x933201f4
+ __ dci(0x056d9794); // splice z20.h, p5, {z28.h, z29.h}
+ // vl128 state = 0x42cf6784
+ __ dci(0x052d9f96); // splice z22.b, p7, {z28.b, z29.b}
+ // vl128 state = 0x0838e776
+ __ dci(0x056d8f9e); // splice z30.h, p3, {z28.h, z29.h}
+ // vl128 state = 0x89637e78
+ __ dci(0x056d9fd6); // splice z22.h, p7, {z30.h, z31.h}
+ // vl128 state = 0xb94dbb49
+ __ dci(0x056d8dd7); // splice z23.h, p3, {z14.h, z15.h}
+ // vl128 state = 0x260f8127
+ __ dci(0x05ad8ddf); // splice z31.s, p3, {z14.s, z15.s}
+ // vl128 state = 0x16257a12
+ __ dci(0x05ad8ddd); // splice z29.s, p3, {z14.s, z15.s}
+ // vl128 state = 0x803d0766
+ __ dci(0x05ad8d7c); // splice z28.s, p3, {z11.s, z12.s}
+ // vl128 state = 0xcc405331
+ __ dci(0x05ad8d74); // splice z20.s, p3, {z11.s, z12.s}
+ // vl128 state = 0x0ed25e4c
+ __ dci(0x05ad8d64); // splice z4.s, p3, {z11.s, z12.s}
+ // vl128 state = 0x167daf8b
+ __ dci(0x05ed8c6c); // splice z12.d, p3, {z3.d, z4.d}
+ // vl128 state = 0x435f3bb9
+ __ dci(0x05ed8cad); // splice z13.d, p3, {z5.d, z6.d}
+ // vl128 state = 0xe49df619
+ __ dci(0x056d8dbd); // splice z29.h, p3, {z13.h, z14.h}
+ // vl128 state = 0x1f54e928
+ __ dci(0x056d8f2d); // splice z13.h, p3, {z25.h, z26.h}
+ // vl128 state = 0x24adbe77
+ __ dci(0x056d8f9d); // splice z29.h, p3, {z28.h, z29.h}
+ // vl128 state = 0xcc2ec3e6
+ __ dci(0x056d8f95); // splice z21.h, p3, {z28.h, z29.h}
+ // vl128 state = 0xb71c64f7
+ __ dci(0x056d8f34); // splice z20.h, p3, {z25.h, z26.h}
+ // vl128 state = 0xb32756f0
+ __ dci(0x05ed8f64); // splice z4.d, p3, {z27.d, z28.d}
+ // vl128 state = 0x3f7d1f13
+ __ dci(0x05ad8e60); // splice z0.s, p3, {z19.s, z20.s}
+ // vl128 state = 0x9a7ffbde
+ __ dci(0x052d8e50); // splice z16.b, p3, {z18.b, z19.b}
+ // vl128 state = 0x5c82ed17
+ __ dci(0x052d9652); // splice z18.b, p5, {z18.b, z19.b}
+ // vl128 state = 0x28b9cd60
+ __ dci(0x052d9ed0); // splice z16.b, p7, {z22.b, z23.b}
+ // vl128 state = 0xab0238ba
+ __ dci(0x052d9ed4); // splice z20.b, p7, {z22.b, z23.b}
+ // vl128 state = 0x9f0e0ef9
+ __ dci(0x056d9cc4); // splice z4.h, p7, {z6.h, z7.h}
+ // vl128 state = 0xec31d5e7
+ __ dci(0x056d98e6); // splice z6.h, p6, {z7.h, z8.h}
+ // vl128 state = 0xbc9c0048
+ __ dci(0x056d9ee4); // splice z4.h, p7, {z23.h, z24.h}
+ // vl128 state = 0xe2e9c9a3
+ __ dci(0x056d9ef4); // splice z20.h, p7, {z23.h, z24.h}
+ // vl128 state = 0x60ffa98a
+ __ dci(0x056d9ab6); // splice z22.h, p6, {z21.h, z22.h}
+ // vl128 state = 0xae70ed0f
+ __ dci(0x056d9294); // splice z20.h, p4, {z20.h, z21.h}
+ // vl128 state = 0x5736c563
+ __ dci(0x056d9284); // splice z4.h, p4, {z20.h, z21.h}
+ // vl128 state = 0xf31dd2d9
+ __ dci(0x052d920c); // splice z12.b, p4, {z16.b, z17.b}
+ // vl128 state = 0x04502fea
+ __ dci(0x052d921c); // splice z28.b, p4, {z16.b, z17.b}
+ // vl128 state = 0x852f98b1
+ __ dci(0x052d9094); // splice z20.b, p4, {z4.b, z5.b}
+ // vl128 state = 0xb40c5931
+ __ dci(0x052d90f6); // splice z22.b, p4, {z7.b, z8.b}
+ // vl128 state = 0x64d6138d
+ __ dci(0x052d88e6); // splice z6.b, p2, {z7.b, z8.b}
+ // vl128 state = 0x51bb6564
+ __ dci(0x052d88e4); // splice z4.b, p2, {z7.b, z8.b}
+ // vl128 state = 0x7ed599b0
+ __ dci(0x05ad8865); // splice z5.s, p2, {z3.s, z4.s}
+ // vl128 state = 0xa201547d
+ __ dci(0x05ad9961); // splice z1.s, p6, {z11.s, z12.s}
+ // vl128 state = 0x9508f19c
+ __ dci(0x05ed9945); // splice z5.d, p6, {z10.d, z11.d}
+ // vl128 state = 0x95399cfd
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x95399cfd,
+ 0xa960b01e,
+ 0x1fedaa18,
+ 0xe2fd3ec3,
+ 0x3edc353b,
+ 0xd809efd8,
+ 0x2a04f527,
+ 0xe4b9bb4a,
+ 0x72e5ed3e,
+ 0x63d6fe93,
+ 0xd2ad18fa,
+ 0x522fe057,
+ 0xc7ba2f7d,
+ 0x2dd44bd3,
+ 0x68b62ae6,
+ 0x06ea6854,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_whilerw_whilewr) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x25ac3026); // whilewr p6.s, x1, x12
+ // vl128 state = 0x91e301ae
+ __ dci(0x25ac3024); // whilewr p4.s, x1, x12
+ // vl128 state = 0x9203b261
+ __ dci(0x25af3020); // whilewr p0.s, x1, x15
+ // vl128 state = 0x87505080
+ __ dci(0x25ef3222); // whilewr p2.d, x17, x15
+ // vl128 state = 0x4ba695cb
+ __ dci(0x25eb320a); // whilewr p10.d, x16, x11
+ // vl128 state = 0x5909d726
+ __ dci(0x25e33308); // whilewr p8.d, x24, x3
+ // vl128 state = 0x52766071
+ __ dci(0x25ea3309); // whilewr p9.d, x24, x10
+ // vl128 state = 0xe906a65a
+ __ dci(0x25aa3101); // whilewr p1.s, x8, x10
+ // vl128 state = 0xd9d56c58
+ __ dci(0x252b3100); // whilewr p0.b, x8, x11
+ // vl128 state = 0xcc868eb9
+ __ dci(0x252a3008); // whilewr p8.b, x0, x10
+ // vl128 state = 0xf78cb912
+ __ dci(0x2528304c); // whilewr p12.b, x2, x8
+ // vl128 state = 0x5493a6c4
+ __ dci(0x25203004); // whilewr p4.b, x0, x0
+ // vl128 state = 0xb3d754b6
+ __ dci(0x25303105); // whilewr p5.b, x8, x16
+ // vl128 state = 0x7fc526df
+ __ dci(0x25b4310d); // whilewr p13.s, x8, x20
+ // vl128 state = 0x5999edda
+ __ dci(0x25ac310c); // whilewr p12.s, x8, x12
+ // vl128 state = 0x46a86248
+ __ dci(0x25ac310e); // whilewr p14.s, x8, x12
+ // vl128 state = 0x0dc5ed70
+ __ dci(0x252c330a); // whilewr p10.b, x24, x12
+ // vl128 state = 0x453a1aa9
+ __ dci(0x252f330b); // whilewr p11.b, x24, x15
+ // vl128 state = 0x98fbdcdf
+ __ dci(0x256e330f); // whilewr p15.h, x24, x14
+ // vl128 state = 0x84699750
+ __ dci(0x252e334d); // whilewr p13.b, x26, x14
+ // vl128 state = 0x198ea519
+ __ dci(0x252e3349); // whilewr p9.b, x26, x14
+ // vl128 state = 0xb4956673
+ __ dci(0x253e33c1); // whilewr p1.b, x30, x30
+ // vl128 state = 0xfd88dd74
+ __ dci(0x252e33e3); // whilewr p3.b, xzr, x14
+ // vl128 state = 0x68cda9df
+ __ dci(0x25ae33cb); // whilewr p11.s, x30, x14
+ // vl128 state = 0x9104f644
+ __ dci(0x25ae33ca); // whilewr p10.s, x30, x14
+ // vl128 state = 0xd9079300
+ __ dci(0x25ea33da); // whilerw p10.d, x30, x10
+ // vl128 state = 0xd9fb019d
+ __ dci(0x25ae33d8); // whilerw p8.s, x30, x14
+ // vl128 state = 0x9edf46fa
+ __ dci(0x25ae32f9); // whilerw p9.s, x23, x14
+ // vl128 state = 0x3b10562f
+ __ dci(0x25ee32d8); // whilerw p8.d, x22, x14
+ // vl128 state = 0x473e26e3
+ __ dci(0x25ec3299); // whilerw p9.d, x20, x12
+ // vl128 state = 0x4feaf55c
+ __ dci(0x25ec329d); // whilerw p13.d, x20, x12
+ // vl128 state = 0x9f9a203a
+ __ dci(0x25e8321c); // whilerw p12.d, x16, x8
+ // vl128 state = 0xd8f32d11
+ __ dci(0x2568301d); // whilerw p13.h, x0, x8
+ // vl128 state = 0xf04b6bb8
+ __ dci(0x2528320d); // whilewr p13.b, x16, x8
+ // vl128 state = 0x0883f877
+ __ dci(0x25a8323d); // whilerw p13.s, x17, x8
+ // vl128 state = 0x9564ca3e
+ __ dci(0x25a8323f); // whilerw p15.s, x17, x8
+ // vl128 state = 0xa50cf036
+ __ dci(0x25e8303d); // whilerw p13.d, x1, x8
+ // vl128 state = 0xe89b1719
+ __ dci(0x25e83175); // whilerw p5.d, x11, x8
+ // vl128 state = 0xe79bea7c
+ __ dci(0x256a3174); // whilerw p4.h, x11, x10
+ // vl128 state = 0xc8ca3b74
+ __ dci(0x256a317c); // whilerw p12.h, x11, x10
+ // vl128 state = 0xc3c88548
+ __ dci(0x256a33f8); // whilerw p8.h, xzr, x10
+ // vl128 state = 0x8b25acc6
+ __ dci(0x256a33f0); // whilerw p0.h, xzr, x10
+ // vl128 state = 0x904c0fd1
+ __ dci(0x25e833e0); // whilewr p0.d, xzr, x8
+ // vl128 state = 0xc893f4c8
+ __ dci(0x25ec32e8); // whilewr p8.d, x23, x12
+ // vl128 state = 0x807edd46
+ __ dci(0x25ed326c); // whilewr p12.d, x19, x13
+ // vl128 state = 0x8b7c637a
+ __ dci(0x256d32ed); // whilewr p13.h, x23, x13
+ // vl128 state = 0xa3c425d3
+ __ dci(0x252d30e9); // whilewr p9.b, x7, x13
+ // vl128 state = 0x0edfe6b9
+ __ dci(0x252531eb); // whilewr p11.b, x15, x5
+ // vl128 state = 0xf716b922
+ __ dci(0x252733ef); // whilewr p15.b, xzr, x7
+ // vl128 state = 0xbf9aea3e
+ __ dci(0x25253367); // whilewr p7.b, x27, x5
+ // vl128 state = 0x357fc408
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x357fc408,
+ 0x8d6fc283,
+ 0x5f73c1df,
+ 0x2963d995,
+ 0x80713760,
+ 0x4638fc82,
+ 0x23955ead,
+ 0x52e4c002,
+ 0xd56ab65c,
+ 0x0e5bb2f2,
+ 0x8c78ec14,
+ 0xd9b634d2,
+ 0x83adc3a2,
+ 0x3b664eea,
+ 0x3d1f5422,
+ 0x7cdcd310,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_mul_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x4468fb6e); // mul z14.h, z27.h, z0.h[5]
+ // vl128 state = 0xcbe81b96
+ __ dci(0x4468f93e); // mul z30.h, z9.h, z0.h[5]
+ // vl128 state = 0x8a75362d
+ __ dci(0x4428f976); // mul z22.h, z11.h, z0.h[1]
+ // vl128 state = 0x1e3c5184
+ __ dci(0x4428fa77); // mul z23.h, z19.h, z0.h[1]
+ // vl128 state = 0x173f58b5
+ __ dci(0x4429fb67); // mul z7.h, z27.h, z1.h[1]
+ // vl128 state = 0x15686c87
+ __ dci(0x4429fb63); // mul z3.h, z27.h, z1.h[1]
+ // vl128 state = 0x41068a87
+ __ dci(0x4428fb53); // mul z19.h, z26.h, z0.h[1]
+ // vl128 state = 0xcfd6e02c
+ __ dci(0x4429fbd1); // mul z17.h, z30.h, z1.h[1]
+ // vl128 state = 0xfd3e0e3c
+ __ dci(0x442afbd9); // mul z25.h, z30.h, z2.h[1]
+ // vl128 state = 0x1e660bf7
+ __ dci(0x442afa5b); // mul z27.h, z18.h, z2.h[1]
+ // vl128 state = 0xb5378f4e
+ __ dci(0x44abfa4b); // mul z11.s, z18.s, z3.s[1]
+ // vl128 state = 0xf34416fe
+ __ dci(0x44abfa4f); // mul z15.s, z18.s, z3.s[1]
+ // vl128 state = 0xc80d6ad9
+ __ dci(0x44a9f84e); // mul z14.s, z2.s, z1.s[1]
+ // vl128 state = 0xa4fe2be7
+ __ dci(0x44e9fa46); // mul z6.d, z18.d, z9.d[0]
+ // vl128 state = 0xaf461ebb
+ __ dci(0x44e9fa8e); // mul z14.d, z20.d, z9.d[0]
+ // vl128 state = 0x9f7acd20
+ __ dci(0x44f1fa8f); // mul z15.d, z20.d, z1.d[1]
+ // vl128 state = 0x1b710469
+ __ dci(0x4471fa07); // mul z7.h, z16.h, z1.h[6]
+ // vl128 state = 0xa2120b4c
+ __ dci(0x4470fa43); // mul z3.h, z18.h, z0.h[6]
+ // vl128 state = 0xb6d6ce4c
+ __ dci(0x4474fb47); // mul z7.h, z26.h, z4.h[6]
+ // vl128 state = 0xeec634bf
+ __ dci(0x4476fa57); // mul z23.h, z18.h, z6.h[6]
+ // vl128 state = 0x893bbe37
+ __ dci(0x447cfa53); // mul z19.h, z18.h, z4.h[7]
+ // vl128 state = 0x8373940b
+ __ dci(0x447dfb52); // mul z18.h, z26.h, z5.h[7]
+ // vl128 state = 0xd1c86434
+ __ dci(0x4477fb56); // mul z22.h, z26.h, z7.h[6]
+ // vl128 state = 0xb247cf9e
+ __ dci(0x4476fb77); // mul z23.h, z27.h, z6.h[6]
+ // vl128 state = 0x6106a868
+ __ dci(0x4467fb7f); // mul z31.h, z27.h, z7.h[4]
+ // vl128 state = 0xc0a11edf
+ __ dci(0x446ffa77); // mul z23.h, z19.h, z7.h[5]
+ // vl128 state = 0xe1879a44
+ __ dci(0x442bfa76); // mul z22.h, z19.h, z3.h[1]
+ // vl128 state = 0xc773115b
+ __ dci(0x442bfa7e); // mul z30.h, z19.h, z3.h[1]
+ // vl128 state = 0x5f5b4793
+ __ dci(0x442afa2e); // mul z14.h, z17.h, z2.h[1]
+ // vl128 state = 0x144b30b2
+ __ dci(0x442afa26); // mul z6.h, z17.h, z2.h[1]
+ // vl128 state = 0x905f8608
+ __ dci(0x442afb6e); // mul z14.h, z27.h, z2.h[1]
+ // vl128 state = 0x0f826c19
+ __ dci(0x44aefb66); // mul z6.s, z27.s, z6.s[1]
+ // vl128 state = 0x7043c090
+ __ dci(0x44aefba4); // mul z4.s, z29.s, z6.s[1]
+ // vl128 state = 0xab3921a9
+ __ dci(0x44aefbb4); // mul z20.s, z29.s, z6.s[1]
+ // vl128 state = 0x7d420495
+ __ dci(0x44acfbf0); // mul z16.s, z31.s, z4.s[1]
+ // vl128 state = 0xceb17a45
+ __ dci(0x44a4fb60); // mul z0.s, z27.s, z4.s[0]
+ // vl128 state = 0x97ed0929
+ __ dci(0x44a5fb30); // mul z16.s, z25.s, z5.s[0]
+ // vl128 state = 0xb7fa54a5
+ __ dci(0x4425f938); // mul z24.h, z9.h, z5.h[0]
+ // vl128 state = 0xfcc1c192
+ __ dci(0x442df830); // mul z16.h, z1.h, z5.h[1]
+ // vl128 state = 0x933ed51d
+ __ dci(0x4427f832); // mul z18.h, z1.h, z7.h[0]
+ // vl128 state = 0x2129d4f0
+ __ dci(0x442ef822); // mul z2.h, z1.h, z6.h[1]
+ // vl128 state = 0x76f6854c
+ __ dci(0x442af803); // mul z3.h, z0.h, z2.h[1]
+ // vl128 state = 0xe763df2d
+ __ dci(0x442af801); // mul z1.h, z0.h, z2.h[1]
+ // vl128 state = 0x61db5a87
+ __ dci(0x442bf900); // mul z0.h, z8.h, z3.h[1]
+ // vl128 state = 0x90883cfb
+ __ dci(0x442bf881); // mul z1.h, z4.h, z3.h[1]
+ // vl128 state = 0xb4afb9b2
+ __ dci(0x4427f885); // mul z5.h, z4.h, z7.h[0]
+ // vl128 state = 0xe512adca
+ __ dci(0x4425f8ad); // mul z13.h, z5.h, z5.h[0]
+ // vl128 state = 0xd820475a
+ __ dci(0x4420f8a5); // mul z5.h, z5.h, z0.h[0]
+ // vl128 state = 0xea9a6f50
+ __ dci(0x4431f8a4); // mul z4.h, z5.h, z1.h[2]
+ // vl128 state = 0x9343e341
+ __ dci(0x4425f8a0); // mul z0.h, z5.h, z5.h[0]
+ // vl128 state = 0x20a5f202
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x20a5f202,
+ 0xdb7b10ee,
+ 0x0607441b,
+ 0x4966f0ff,
+ 0x5f750338,
+ 0x9be09ff4,
+ 0x8805a320,
+ 0x52cf70b0,
+ 0x5f4c6d92,
+ 0xf8009f1f,
+ 0x56cd1ff6,
+ 0x345f063d,
+ 0x3807ccf3,
+ 0xf7eb85a8,
+ 0x1600c143,
+ 0x97be6c01,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_mla_mls_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44200800); // mla z0.h, z0.h, z0.h[0]
+ // vl128 state = 0x06aac22e
+ __ dci(0x44200a28); // mla z8.h, z17.h, z0.h[0]
+ // vl128 state = 0xde2255a4
+ __ dci(0x44e00a2a); // mla z10.d, z17.d, z0.d[0]
+ // vl128 state = 0x9bf1bae6
+ __ dci(0x44600e3a); // mls z26.h, z17.h, z0.h[4]
+ // vl128 state = 0x28b58feb
+ __ dci(0x44e20e2a); // mls z10.d, z17.d, z2.d[0]
+ // vl128 state = 0x0ac8fcc8
+ __ dci(0x44620f2e); // mls z14.h, z25.h, z2.h[4]
+ // vl128 state = 0x955da860
+ __ dci(0x44630f6a); // mls z10.h, z27.h, z3.h[4]
+ // vl128 state = 0x654ee915
+ __ dci(0x44730b6e); // mla z14.h, z27.h, z3.h[6]
+ // vl128 state = 0x3fd3e02c
+ __ dci(0x44720f6f); // mls z15.h, z27.h, z2.h[6]
+ // vl128 state = 0x46031098
+ __ dci(0x44620f4b); // mls z11.h, z26.h, z2.h[4]
+ // vl128 state = 0xd49183cf
+ __ dci(0x446a0b5b); // mla z27.h, z26.h, z2.h[5]
+ // vl128 state = 0x4fe290c1
+ __ dci(0x44680b73); // mla z19.h, z27.h, z0.h[5]
+ // vl128 state = 0xf6fccd86
+ __ dci(0x44e90b77); // mla z23.d, z27.d, z9.d[0]
+ // vl128 state = 0x57b2090d
+ __ dci(0x44f10b76); // mla z22.d, z27.d, z1.d[1]
+ // vl128 state = 0x5a6932eb
+ __ dci(0x44f40b77); // mla z23.d, z27.d, z4.d[1]
+ // vl128 state = 0x8e33d7d5
+ __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4]
+ // vl128 state = 0xaa01885d
+ __ dci(0x44640b7d); // mla z29.h, z27.h, z4.h[4]
+ // vl128 state = 0x2ef00e60
+ __ dci(0x44640b7f); // mla z31.h, z27.h, z4.h[4]
+ // vl128 state = 0x94ac10d3
+ __ dci(0x44340b7e); // mla z30.h, z27.h, z4.h[2]
+ // vl128 state = 0x48211118
+ __ dci(0x44340e7a); // mls z26.h, z19.h, z4.h[2]
+ // vl128 state = 0x72cc2767
+ __ dci(0x44b40eea); // mls z10.s, z23.s, z4.s[2]
+ // vl128 state = 0x3855f70f
+ __ dci(0x44e40ee2); // mls z2.d, z23.d, z4.d[0]
+ // vl128 state = 0xf9225160
+ __ dci(0x44ec0ea3); // mls z3.d, z21.d, z12.d[0]
+ // vl128 state = 0xf9b94fd0
+ __ dci(0x44ae0ea7); // mls z7.s, z21.s, z6.s[1]
+ // vl128 state = 0x06070917
+ __ dci(0x44ae0eb7); // mls z23.s, z21.s, z6.s[1]
+ // vl128 state = 0x26ecdd18
+ __ dci(0x44ae0e07); // mls z7.s, z16.s, z6.s[1]
+ // vl128 state = 0xaa8e3a32
+ __ dci(0x44ae0a85); // mla z5.s, z20.s, z6.s[1]
+ // vl128 state = 0x2379cba0
+ __ dci(0x44ae0a81); // mla z1.s, z20.s, z6.s[1]
+ // vl128 state = 0x3cc8a61c
+ __ dci(0x442a0a85); // mla z5.h, z20.h, z2.h[1]
+ // vl128 state = 0x96f118ef
+ __ dci(0x443e0a84); // mla z4.h, z20.h, z6.h[3]
+ // vl128 state = 0xa3f8cb41
+ __ dci(0x443f0b8c); // mla z12.h, z28.h, z7.h[3]
+ // vl128 state = 0x97fcb1da
+ __ dci(0x442f0bbc); // mla z28.h, z29.h, z7.h[1]
+ // vl128 state = 0x761e9499
+ __ dci(0x44270fac); // mls z12.h, z29.h, z7.h[0]
+ // vl128 state = 0xfb28f943
+ __ dci(0x442f0ead); // mls z13.h, z21.h, z7.h[1]
+ // vl128 state = 0x387a2623
+ __ dci(0x44270fa9); // mls z9.h, z29.h, z7.h[0]
+ // vl128 state = 0x22f03847
+ __ dci(0x44270f68); // mls z8.h, z27.h, z7.h[0]
+ // vl128 state = 0xada4998b
+ __ dci(0x44270f6c); // mls z12.h, z27.h, z7.h[0]
+ // vl128 state = 0xdf80a034
+ __ dci(0x44270f7c); // mls z28.h, z27.h, z7.h[0]
+ // vl128 state = 0x3ccddaa6
+ __ dci(0x44250f2c); // mls z12.h, z25.h, z5.h[0]
+ // vl128 state = 0x588502cb
+ __ dci(0x442f0f28); // mls z8.h, z25.h, z7.h[1]
+ // vl128 state = 0x79c90307
+ __ dci(0x446f0d2c); // mls z12.h, z9.h, z7.h[5]
+ // vl128 state = 0xaa0b21a9
+ __ dci(0x44af0d2e); // mls z14.s, z9.s, z7.s[1]
+ // vl128 state = 0xd5ccc60c
+ __ dci(0x44ed0d26); // mls z6.d, z9.d, z13.d[0]
+ // vl128 state = 0x15037cbe
+ __ dci(0x44fd0f2e); // mls z14.d, z25.d, z13.d[1]
+ // vl128 state = 0x9f481fdf
+ __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1]
+ // vl128 state = 0x93fe8537
+ __ dci(0x447d0e3f); // mls z31.h, z17.h, z5.h[7]
+ // vl128 state = 0x14b9edf2
+ __ dci(0x44f90e2f); // mls z15.d, z17.d, z9.d[1]
+ // vl128 state = 0xde1c0d1c
+ __ dci(0x44790c27); // mls z7.h, z1.h, z1.h[7]
+ // vl128 state = 0x563d614a
+ __ dci(0x44790c23); // mls z3.h, z1.h, z1.h[7]
+ // vl128 state = 0x8c6d9baf
+ __ dci(0x44f90c6b); // mls z11.d, z3.d, z9.d[1]
+ // vl128 state = 0x1a25c073
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x1a25c073,
+ 0xfbb2c945,
+ 0x932b8ab7,
+ 0x99370bee,
+ 0x44a15f80,
+ 0xae898f1d,
+ 0x97382827,
+ 0xafec059e,
+ 0xf11bc007,
+ 0x34c49b30,
+ 0x73b95606,
+ 0x77324772,
+ 0x9ad7d21b,
+ 0x0d0958a7,
+ 0xee4accc3,
+ 0x31d34df8,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_mla_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44935abe); // umlslb z30.s, z21.h, z19.h
+ // vl128 state = 0x4fac8e49
+ __ dci(0x449358fa); // umlslb z26.s, z7.h, z19.h
+ // vl128 state = 0xca971f04
+ __ dci(0x44935adb); // umlslb z27.s, z22.h, z19.h
+ // vl128 state = 0x5652564b
+ __ dci(0x449359da); // umlslb z26.s, z14.h, z19.h
+ // vl128 state = 0xf2d81244
+ __ dci(0x448349de); // umlalb z30.s, z14.h, z3.h
+ // vl128 state = 0x7cbaa548
+ __ dci(0x448349d6); // umlalb z22.s, z14.h, z3.h
+ // vl128 state = 0x9e7b4915
+ __ dci(0x44c34952); // umlalb z18.d, z10.s, z3.s
+ // vl128 state = 0x550af70e
+ __ dci(0x44d349d3); // umlalb z19.d, z14.s, z19.s
+ // vl128 state = 0x676743b2
+ __ dci(0x44d549d7); // umlalb z23.d, z14.s, z21.s
+ // vl128 state = 0x602e09e4
+ __ dci(0x44d55ddf); // umlslt z31.d, z14.s, z21.s
+ // vl128 state = 0xd4c245de
+ __ dci(0x44d55d1b); // umlslt z27.d, z8.s, z21.s
+ // vl128 state = 0x9c2c1cb4
+ __ dci(0x44d5490b); // umlalb z11.d, z8.s, z21.s
+ // vl128 state = 0x8a702002
+ __ dci(0x44554d0a); // umlalt z10.h, z8.b, z21.b
+ // vl128 state = 0x6758ce3c
+ __ dci(0x4455452b); // smlalt z11.h, z9.b, z21.b
+ // vl128 state = 0x967e596e
+ __ dci(0x44554529); // smlalt z9.h, z9.b, z21.b
+ // vl128 state = 0x1300909a
+ __ dci(0x44474521); // smlalt z1.h, z9.b, z7.b
+ // vl128 state = 0x01ca26c1
+ __ dci(0x44c74d25); // umlalt z5.d, z9.s, z7.s
+ // vl128 state = 0x8e6313b9
+ __ dci(0x44cb4d24); // umlalt z4.d, z9.s, z11.s
+ // vl128 state = 0xdb41e004
+ __ dci(0x44cb4d2c); // umlalt z12.d, z9.s, z11.s
+ // vl128 state = 0x941401ca
+ __ dci(0x44c94da8); // umlalt z8.d, z13.s, z9.s
+ // vl128 state = 0x8a57334b
+ __ dci(0x44594db8); // umlalt z24.h, z13.b, z25.b
+ // vl128 state = 0x94333fae
+ __ dci(0x44585db0); // umlslt z16.h, z13.b, z24.b
+ // vl128 state = 0xf4fbe251
+ __ dci(0x44585f80); // umlslt z0.h, z28.b, z24.b
+ // vl128 state = 0x1f5aeef3
+ __ dci(0x445a5fc2); // umlslt z2.h, z30.b, z26.b
+ // vl128 state = 0x4b153d20
+ __ dci(0x445a5fd2); // umlslt z18.h, z30.b, z26.b
+ // vl128 state = 0xbd82f0a2
+ __ dci(0x445a5fd3); // umlslt z19.h, z30.b, z26.b
+ // vl128 state = 0x72d7083d
+ __ dci(0x44525bd2); // umlslb z18.h, z30.b, z18.b
+ // vl128 state = 0x5018a138
+ __ dci(0x44525bd6); // umlslb z22.h, z30.b, z18.b
+ // vl128 state = 0xcaf48a01
+ __ dci(0x445053d2); // smlslb z18.h, z30.b, z16.b
+ // vl128 state = 0x76e2d850
+ __ dci(0x44d153c2); // smlslb z2.d, z30.s, z17.s
+ // vl128 state = 0x8594d6c9
+ __ dci(0x449353c3); // smlslb z3.s, z30.h, z19.h
+ // vl128 state = 0x8e0da89d
+ __ dci(0x449152c7); // smlslb z7.s, z22.h, z17.h
+ // vl128 state = 0xe7d08864
+ __ dci(0x44995285); // smlslb z5.s, z20.h, z25.h
+ // vl128 state = 0xd7c49fca
+ __ dci(0x449953c1); // smlslb z1.s, z30.h, z25.h
+ // vl128 state = 0x3b648b39
+ __ dci(0x449152c9); // smlslb z9.s, z22.h, z17.h
+ // vl128 state = 0x5b5bab94
+ __ dci(0x449542cd); // smlalb z13.s, z22.h, z21.h
+ // vl128 state = 0x65282d76
+ __ dci(0x449c42c9); // smlalb z9.s, z22.h, z28.h
+ // vl128 state = 0x94a92486
+ __ dci(0x449c52f9); // smlslb z25.s, z23.h, z28.h
+ // vl128 state = 0xd4f62835
+ __ dci(0x44dc5afd); // umlslb z29.d, z23.s, z28.s
+ // vl128 state = 0xf124c6a1
+ __ dci(0x44dd58ff); // umlslb z31.d, z7.s, z29.s
+ // vl128 state = 0xbc694f1c
+ __ dci(0x44dc587b); // umlslb z27.d, z3.s, z28.s
+ // vl128 state = 0xf1621eb2
+ __ dci(0x44de596b); // umlslb z11.d, z11.s, z30.s
+ // vl128 state = 0x944b4b75
+ __ dci(0x44de5969); // umlslb z9.d, z11.s, z30.s
+ // vl128 state = 0xa98a2c38
+ __ dci(0x44db596d); // umlslb z13.d, z11.s, z27.s
+ // vl128 state = 0x6bd60807
+ __ dci(0x44db5d5d); // umlslt z29.d, z10.s, z27.s
+ // vl128 state = 0x9c377b51
+ __ dci(0x449b555f); // smlslt z31.s, z10.h, z27.h
+ // vl128 state = 0x7c81f1d5
+ __ dci(0x449b555d); // smlslt z29.s, z10.h, z27.h
+ // vl128 state = 0xdaab1edb
+ __ dci(0x44d35559); // smlslt z25.d, z10.s, z19.s
+ // vl128 state = 0xdc3f25f1
+ __ dci(0x44d355f8); // smlslt z24.d, z15.s, z19.s
+ // vl128 state = 0x9c75a3cf
+ __ dci(0x44d356f9); // smlslt z25.d, z23.s, z19.s
+ // vl128 state = 0x5b999178
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5b999178,
+ 0xd6191e64,
+ 0x1f3bd2a1,
+ 0x1e0ac282,
+ 0x8d13f5d3,
+ 0x97157e8f,
+ 0x5d6e4134,
+ 0x8d2186b4,
+ 0x88078c65,
+ 0x6dd92db3,
+ 0xfcd02d21,
+ 0x81738dc2,
+ 0x644e3c06,
+ 0x9c9d2ac8,
+ 0xaaa43548,
+ 0x871e9b08,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_complex_integer_multiply_add_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+ __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270
+ // vl128 state = 0x12e9bd68
+ __ dci(0x44dd2f3c); // cmla z28.d, z25.d, z29.d, #270
+ // vl128 state = 0x4fd8ba3e
+ __ dci(0x44dc2734); // cmla z20.d, z25.d, z28.d, #90
+ // vl128 state = 0x9b11d64f
+ __ dci(0x44dc2e36); // cmla z22.d, z17.d, z28.d, #270
+ // vl128 state = 0x4658e6ae
+ __ dci(0x44dd2f34); // cmla z20.d, z25.d, z29.d, #270
+ // vl128 state = 0x5151ea16
+ __ dci(0x44dc2fb5); // cmla z21.d, z29.d, z28.d, #270
+ // vl128 state = 0x21c497cc
+ __ dci(0x44dc2fbd); // cmla z29.d, z29.d, z28.d, #270
+ // vl128 state = 0xe823fd46
+ __ dci(0x44dc2e3c); // cmla z28.d, z17.d, z28.d, #270
+ // vl128 state = 0xcc35cda6
+ __ dci(0x44dc2e34); // cmla z20.d, z17.d, z28.d, #270
+ // vl128 state = 0x963047c0
+ __ dci(0x44d42c30); // cmla z16.d, z1.d, z20.d, #270
+ // vl128 state = 0x5d2c5643
+ __ dci(0x44c42c60); // cmla z0.d, z3.d, z4.d, #270
+ // vl128 state = 0xfd400169
+ __ dci(0x44842464); // cmla z4.s, z3.s, z4.s, #90
+ // vl128 state = 0x00116098
+ __ dci(0x44842d60); // cmla z0.s, z11.s, z4.s, #270
+ // vl128 state = 0x582d46e3
+ __ dci(0x44042562); // cmla z2.b, z11.b, z4.b, #90
+ // vl128 state = 0x1bd70bf0
+ __ dci(0x44042420); // cmla z0.b, z1.b, z4.b, #90
+ // vl128 state = 0x7682807d
+ __ dci(0x44062401); // cmla z1.b, z0.b, z6.b, #90
+ // vl128 state = 0xaa3e2c64
+ __ dci(0x44042449); // cmla z9.b, z2.b, z4.b, #90
+ // vl128 state = 0xd81638f9
+ __ dci(0x44052059); // cmla z25.b, z2.b, z5.b, #0
+ // vl128 state = 0x38cb5d96
+ __ dci(0x4415305d); // sqrdcmlah z29.b, z2.b, z21.b, #0
+ // vl128 state = 0x4c6b85e0
+ __ dci(0x44153819); // sqrdcmlah z25.b, z0.b, z21.b, #180
+ // vl128 state = 0x229b5be9
+ __ dci(0x4405391b); // sqrdcmlah z27.b, z8.b, z5.b, #180
+ // vl128 state = 0x82611aec
+ __ dci(0x4405314b); // sqrdcmlah z11.b, z10.b, z5.b, #0
+ // vl128 state = 0xe58c48e0
+ __ dci(0x4407316a); // sqrdcmlah z10.b, z11.b, z7.b, #0
+ // vl128 state = 0x5282838a
+ __ dci(0x4407347a); // sqrdcmlah z26.b, z3.b, z7.b, #90
+ // vl128 state = 0x134a0891
+ __ dci(0x4413347e); // sqrdcmlah z30.b, z3.b, z19.b, #90
+ // vl128 state = 0x455ab9e0
+ __ dci(0x4443347f); // sqrdcmlah z31.h, z3.h, z3.h, #90
+ // vl128 state = 0x030d9d2c
+ __ dci(0x444b307e); // sqrdcmlah z30.h, z3.h, z11.h, #0
+ // vl128 state = 0x91a95a2c
+ __ dci(0x444b301f); // sqrdcmlah z31.h, z0.h, z11.h, #0
+ // vl128 state = 0x0f1c8468
+ __ dci(0x4409300f); // sqrdcmlah z15.b, z0.b, z9.b, #0
+ // vl128 state = 0x95f802b7
+ __ dci(0x440c300e); // sqrdcmlah z14.b, z0.b, z12.b, #0
+ // vl128 state = 0x5fa6d2c6
+ __ dci(0x4404310c); // sqrdcmlah z12.b, z8.b, z4.b, #0
+ // vl128 state = 0x192b05a4
+ __ dci(0x4415310d); // sqrdcmlah z13.b, z8.b, z21.b, #0
+ // vl128 state = 0xa8a8d37f
+ __ dci(0x4414350f); // sqrdcmlah z15.b, z8.b, z20.b, #90
+ // vl128 state = 0xcd890d8c
+ __ dci(0x4454354d); // sqrdcmlah z13.h, z10.h, z20.h, #90
+ // vl128 state = 0x91ab863e
+ __ dci(0x444435c5); // sqrdcmlah z5.h, z14.h, z4.h, #90
+ // vl128 state = 0x41bbc90c
+ __ dci(0x444c34c7); // sqrdcmlah z7.h, z6.h, z12.h, #90
+ // vl128 state = 0xb6329344
+ __ dci(0x444836c6); // sqrdcmlah z6.h, z22.h, z8.h, #90
+ // vl128 state = 0xdf5f443c
+ __ dci(0x444836d6); // sqrdcmlah z22.h, z22.h, z8.h, #90
+ // vl128 state = 0x719a2e70
+ __ dci(0x44403694); // sqrdcmlah z20.h, z20.h, z0.h, #90
+ // vl128 state = 0x28a64934
+ __ dci(0x4449369c); // sqrdcmlah z28.h, z20.h, z9.h, #90
+ // vl128 state = 0x5d41ba84
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5d41ba84,
+ 0xd5e52f4d,
+ 0x9f627c0d,
+ 0x111f21a7,
+ 0x5d7b356e,
+ 0x1f345c0e,
+ 0xd881296e,
+ 0x819f9091,
+ 0x59823550,
+ 0xbe2162c7,
+ 0x5f5dca40,
+ 0xad7e429e,
+ 0x4f66661f,
+ 0x7c5fbca0,
+ 0x819ff997,
+ 0x68ebdb56,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_complex_integer_multiply_add_indexed) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0x44fd7d52); // sqrdcmlah z18.s, z10.s, z13.s[1], #270
+ // vl128 state = 0x5c66baad
+ __ dci(0x44fd7c13); // sqrdcmlah z19.s, z0.s, z13.s[1], #270
+ // vl128 state = 0xac8c451b
+ __ dci(0x44f97e11); // sqrdcmlah z17.s, z16.s, z9.s[1], #270
+ // vl128 state = 0x02ebccdb
+ __ dci(0x44e97615); // sqrdcmlah z21.s, z16.s, z9.s[0], #90
+ // vl128 state = 0xe43b1032
+ __ dci(0x44e97614); // sqrdcmlah z20.s, z16.s, z9.s[0], #90
+ // vl128 state = 0xa28d9898
+ __ dci(0x44e17635); // sqrdcmlah z21.s, z17.s, z1.s[0], #90
+ // vl128 state = 0x021764c6
+ __ dci(0x44e17634); // sqrdcmlah z20.s, z17.s, z1.s[0], #90
+ // vl128 state = 0x812dbf22
+ __ dci(0x44f07635); // sqrdcmlah z21.s, z17.s, z0.s[1], #90
+ // vl128 state = 0x5e87a59e
+ __ dci(0x44f07465); // sqrdcmlah z5.s, z3.s, z0.s[1], #90
+ // vl128 state = 0xd1a78d9d
+ __ dci(0x44f87675); // sqrdcmlah z21.s, z19.s, z8.s[1], #90
+ // vl128 state = 0xd4500975
+ __ dci(0x44b87e7d); // sqrdcmlah z29.h, z19.h, z0.h[3], #270
+ // vl128 state = 0x765230ab
+ __ dci(0x44b876f9); // sqrdcmlah z25.h, z23.h, z0.h[3], #90
+ // vl128 state = 0xca9c5bb4
+ __ dci(0x44f874fb); // sqrdcmlah z27.s, z7.s, z8.s[1], #90
+ // vl128 state = 0xa4bc044a
+ __ dci(0x44f070fa); // sqrdcmlah z26.s, z7.s, z0.s[1], #0
+ // vl128 state = 0xd0eaa1df
+ __ dci(0x44f07038); // sqrdcmlah z24.s, z1.s, z0.s[1], #0
+ // vl128 state = 0x80836f9f
+ __ dci(0x44b17030); // sqrdcmlah z16.h, z1.h, z1.h[2], #0
+ // vl128 state = 0x59ffa1ce
+ __ dci(0x44b17032); // sqrdcmlah z18.h, z1.h, z1.h[2], #0
+ // vl128 state = 0xdb8beca5
+ __ dci(0x44b07430); // sqrdcmlah z16.h, z1.h, z0.h[2], #90
+ // vl128 state = 0xe5b6a0e3
+ __ dci(0x44b07438); // sqrdcmlah z24.h, z1.h, z0.h[2], #90
+ // vl128 state = 0x19cc8c20
+ __ dci(0x44b0743a); // sqrdcmlah z26.h, z1.h, z0.h[2], #90
+ // vl128 state = 0x19c819af
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x19c819af,
+ 0xbb2225f2,
+ 0x7e54f513,
+ 0xdcbf6f0f,
+ 0x2bfdc97d,
+ 0x48890c54,
+ 0x65542c02,
+ 0xaef6b224,
+ 0x993b14fd,
+ 0x244d27c5,
+ 0xe8767ba8,
+ 0x4397a148,
+ 0xb3efcd2e,
+ 0xb5894aba,
+ 0x2a0f6f7a,
+ 0xbe45142c,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_saturating_multiply_add_long_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+ __ dci(0x44db629b); // sqdmlalb z27.d, z20.s, z27.s
+ // vl128 state = 0x61e408e4
+ __ dci(0x44db631f); // sqdmlalb z31.d, z24.s, z27.s
+ // vl128 state = 0xf146813f
+ __ dci(0x44da6b1d); // sqdmlslb z29.d, z24.s, z26.s
+ // vl128 state = 0xb8d07371
+ __ dci(0x44da6a35); // sqdmlslb z21.d, z17.s, z26.s
+ // vl128 state = 0xaf43cc88
+ __ dci(0x444a6a3d); // sqdmlslb z29.h, z17.b, z10.b
+ // vl128 state = 0xba4c5067
+ __ dci(0x444a6a39); // sqdmlslb z25.h, z17.b, z10.b
+ // vl128 state = 0x396202c3
+ __ dci(0x445a6829); // sqdmlslb z9.h, z1.b, z26.b
+ // vl128 state = 0x22095f7f
+ __ dci(0x445a6b28); // sqdmlslb z8.h, z25.b, z26.b
+ // vl128 state = 0xa9516b4b
+ __ dci(0x44da6b69); // sqdmlslb z9.d, z27.s, z26.s
+ // vl128 state = 0x1f048226
+ __ dci(0x44da616d); // sqdmlalb z13.d, z11.s, z26.s
+ // vl128 state = 0x0fdd982f
+ __ dci(0x4458616f); // sqdmlalb z15.h, z11.b, z24.b
+ // vl128 state = 0x461ba137
+ __ dci(0x4449617f); // sqdmlalb z31.h, z11.b, z9.b
+ // vl128 state = 0xd1071b0c
+ __ dci(0x4459614f); // sqdmlalb z15.h, z10.b, z25.b
+ // vl128 state = 0x0fa6bae7
+ __ dci(0x4458654d); // sqdmlalt z13.h, z10.b, z24.b
+ // vl128 state = 0xebd08a80
+ __ dci(0x44586d05); // sqdmlslt z5.h, z8.b, z24.b
+ // vl128 state = 0xd4c41665
+ __ dci(0x44506d84); // sqdmlslt z4.h, z12.b, z16.b
+ // vl128 state = 0x80f619f9
+ __ dci(0x44506fc6); // sqdmlslt z6.h, z30.b, z16.b
+ // vl128 state = 0xb588af21
+ __ dci(0x44566fc4); // sqdmlslt z4.h, z30.b, z22.b
+ // vl128 state = 0x4dd8437a
+ __ dci(0x44566f0c); // sqdmlslt z12.h, z24.b, z22.b
+ // vl128 state = 0x48ca6e5c
+ __ dci(0x44566f0e); // sqdmlslt z14.h, z24.b, z22.b
+ // vl128 state = 0x02d6f977
+ __ dci(0x44566746); // sqdmlalt z6.h, z26.b, z22.b
+ // vl128 state = 0x179f59f4
+ __ dci(0x445767c4); // sqdmlalt z4.h, z30.b, z23.b
+ // vl128 state = 0xf2d2823c
+ __ dci(0x44d667c0); // sqdmlalt z0.d, z30.s, z22.s
+ // vl128 state = 0x404c277e
+ __ dci(0x44566742); // sqdmlalt z2.h, z26.b, z22.b
+ // vl128 state = 0x986a72c1
+ __ dci(0x44c6674a); // sqdmlalt z10.d, z26.s, z6.s
+ // vl128 state = 0xbb8044ab
+ __ dci(0x44c66742); // sqdmlalt z2.d, z26.s, z6.s
+ // vl128 state = 0x9f5b244b
+ __ dci(0x44ce6706); // sqdmlalt z6.d, z24.s, z14.s
+ // vl128 state = 0xc6ce6266
+ __ dci(0x44ce670e); // sqdmlalt z14.d, z24.s, z14.s
+ // vl128 state = 0xc9e1a461
+ __ dci(0x44de6746); // sqdmlalt z6.d, z26.s, z30.s
+ // vl128 state = 0x9f133504
+ __ dci(0x44dc6342); // sqdmlalb z2.d, z26.s, z28.s
+ // vl128 state = 0x42deb468
+ __ dci(0x44d46366); // sqdmlalb z6.d, z27.s, z20.s
+ // vl128 state = 0xb3436cd4
+ __ dci(0x44d5626e); // sqdmlalb z14.d, z19.s, z21.s
+ // vl128 state = 0x0e0533ac
+ __ dci(0x44d5646f); // sqdmlalt z15.d, z3.s, z21.s
+ // vl128 state = 0x92d04e7b
+ __ dci(0x44d36467); // sqdmlalt z7.d, z3.s, z19.s
+ // vl128 state = 0xd9fa8b4d
+ __ dci(0x44d360ef); // sqdmlalb z15.d, z7.s, z19.s
+ // vl128 state = 0x9c9a5778
+ __ dci(0x44d3646b); // sqdmlalt z11.d, z3.s, z19.s
+ // vl128 state = 0x40d7c923
+ __ dci(0x4492646f); // sqdmlalt z15.s, z3.h, z18.h
+ // vl128 state = 0x0b5b2334
+ __ dci(0x4492647f); // sqdmlalt z31.s, z3.h, z18.h
+ // vl128 state = 0xfe6302c1
+ __ dci(0x4494647d); // sqdmlalt z29.s, z3.h, z20.h
+ // vl128 state = 0xe3c05a37
+ __ dci(0x4484666d); // sqdmlalt z13.s, z19.h, z4.h
+ // vl128 state = 0x15169e94
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x15169e94,
+ 0x6101102c,
+ 0xa5586d26,
+ 0x3fbf4f9f,
+ 0x8e62994d,
+ 0x4d77a9e5,
+ 0x4ceadc9e,
+ 0x8247db61,
+ 0x4aa10859,
+ 0x0b3280b3,
+ 0x015d75ea,
+ 0x1cf4825e,
+ 0xda7d3fea,
+ 0xc24bd624,
+ 0x60ee565a,
+ 0x7ac92c39,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_saturating_multiply_add_interleaved_long) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x449e0ac6); // sqdmlalbt z6.s, z22.h, z30.h
+ // vl128 state = 0x1f0ef37c
+ __ dci(0x449c0ae4); // sqdmlalbt z4.s, z23.h, z28.h
+ // vl128 state = 0xa80bf2c8
+ __ dci(0x449c0ae6); // sqdmlalbt z6.s, z23.h, z28.h
+ // vl128 state = 0x4c5b0e8f
+ __ dci(0x449e0aae); // sqdmlalbt z14.s, z21.h, z30.h
+ // vl128 state = 0xa6482041
+ __ dci(0x449e0aaf); // sqdmlalbt z15.s, z21.h, z30.h
+ // vl128 state = 0x6ef82b7a
+ __ dci(0x449c0a2b); // sqdmlalbt z11.s, z17.h, z28.h
+ // vl128 state = 0x0070a7fa
+ __ dci(0x449e0829); // sqdmlalbt z9.s, z1.h, z30.h
+ // vl128 state = 0x08b9efc6
+ __ dci(0x449e0c61); // sqdmlslbt z1.s, z3.h, z30.h
+ // vl128 state = 0xebd25c16
+ __ dci(0x449e0c60); // sqdmlslbt z0.s, z3.h, z30.h
+ // vl128 state = 0x0926abbe
+ __ dci(0x449e0c70); // sqdmlslbt z16.s, z3.h, z30.h
+ // vl128 state = 0xe9d3e5a7
+ __ dci(0x449f0cf4); // sqdmlslbt z20.s, z7.h, z31.h
+ // vl128 state = 0xf062523d
+ __ dci(0x449f08b5); // sqdmlalbt z21.s, z5.h, z31.h
+ // vl128 state = 0x6034c14e
+ __ dci(0x449f08a5); // sqdmlalbt z5.s, z5.h, z31.h
+ // vl128 state = 0x0a73c74b
+ __ dci(0x448e08b5); // sqdmlalbt z21.s, z5.h, z14.h
+ // vl128 state = 0xa4af2700
+ __ dci(0x448c08e5); // sqdmlalbt z5.s, z7.h, z12.h
+ // vl128 state = 0x7499c587
+ __ dci(0x448c08e1); // sqdmlalbt z1.s, z7.h, z12.h
+ // vl128 state = 0x968bca0e
+ __ dci(0x448c0971); // sqdmlalbt z17.s, z11.h, z12.h
+ // vl128 state = 0xd7890449
+ __ dci(0x448f0975); // sqdmlalbt z21.s, z11.h, z15.h
+ // vl128 state = 0xa2393863
+ __ dci(0x448f0977); // sqdmlalbt z23.s, z11.h, z15.h
+ // vl128 state = 0x0f7d9688
+ __ dci(0x449f093f); // sqdmlalbt z31.s, z9.h, z31.h
+ // vl128 state = 0xeb16ca99
+ __ dci(0x449f09f7); // sqdmlalbt z23.s, z15.h, z31.h
+ // vl128 state = 0x5eca8b00
+ __ dci(0x449f0987); // sqdmlalbt z7.s, z12.h, z31.h
+ // vl128 state = 0xf8f22744
+ __ dci(0x449f0a83); // sqdmlalbt z3.s, z20.h, z31.h
+ // vl128 state = 0xc20d54f5
+ __ dci(0x449b0ac1); // sqdmlalbt z1.s, z22.h, z27.h
+ // vl128 state = 0xf371a13b
+ __ dci(0x449b0aa9); // sqdmlalbt z9.s, z21.h, z27.h
+ // vl128 state = 0xffae55ce
+ __ dci(0x449b0ab9); // sqdmlalbt z25.s, z21.h, z27.h
+ // vl128 state = 0x0c5ab866
+ __ dci(0x44d30aa9); // sqdmlalbt z9.d, z21.s, z19.s
+ // vl128 state = 0x388bfe27
+ __ dci(0x44d30aab); // sqdmlalbt z11.d, z21.s, z19.s
+ // vl128 state = 0x6dc15ec8
+ __ dci(0x44d70baf); // sqdmlalbt z15.d, z29.s, z23.s
+ // vl128 state = 0x6a858021
+ __ dci(0x44d70ba7); // sqdmlalbt z7.d, z29.s, z23.s
+ // vl128 state = 0x52416517
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x52416517,
+ 0x1a625e10,
+ 0x3eaaa30f,
+ 0x0eefe820,
+ 0x9e2f7744,
+ 0x3dbc3206,
+ 0xca85b926,
+ 0x9428c809,
+ 0x7c35818c,
+ 0xb8bc3648,
+ 0x5b215c50,
+ 0xbdb56ba5,
+ 0xe4e4bc54,
+ 0x69ba132f,
+ 0xa498b17a,
+ 0xf482b2a6,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_saturating_multiply_add_long_indexed) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44f52e3d); // sqdmlalt z29.d, z17.s, z5.s[3]
+ // vl128 state = 0x2a284ede
+ __ dci(0x44f52e3c); // sqdmlalt z28.d, z17.s, z5.s[3]
+ // vl128 state = 0x48a615e9
+ __ dci(0x44f72c3d); // sqdmlalt z29.d, z1.s, z7.s[3]
+ // vl128 state = 0x1bbe9cc5
+ __ dci(0x44b62c35); // sqdmlalt z21.s, z1.h, z6.h[5]
+ // vl128 state = 0x99966225
+ __ dci(0x44b624b7); // sqdmlalt z23.s, z5.h, z6.h[4]
+ // vl128 state = 0x36da4a3a
+ __ dci(0x44f626b6); // sqdmlalt z22.d, z21.s, z6.s[2]
+ // vl128 state = 0xc009e514
+ __ dci(0x44f62226); // sqdmlalb z6.d, z17.s, z6.s[2]
+ // vl128 state = 0x2140ee4b
+ __ dci(0x44fa222e); // sqdmlalb z14.d, z17.s, z10.s[2]
+ // vl128 state = 0xf78c8bec
+ __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3]
+ // vl128 state = 0x329238c6
+ __ dci(0x44fa2abc); // sqdmlalb z28.d, z21.s, z10.s[3]
+ // vl128 state = 0xadc9f9db
+ __ dci(0x44fa2aac); // sqdmlalb z12.d, z21.s, z10.s[3]
+ // vl128 state = 0x877f64cf
+ __ dci(0x44ba2a88); // sqdmlalb z8.s, z20.h, z2.h[7]
+ // vl128 state = 0x4e4a3117
+ __ dci(0x44fb2a89); // sqdmlalb z9.d, z20.s, z11.s[3]
+ // vl128 state = 0xe26b041b
+ __ dci(0x44f32ab9); // sqdmlalb z25.d, z21.s, z3.s[3]
+ // vl128 state = 0xbcf4e0b2
+ __ dci(0x44e328bd); // sqdmlalb z29.d, z5.s, z3.s[1]
+ // vl128 state = 0x31391cc2
+ __ dci(0x44f228ad); // sqdmlalb z13.d, z5.s, z2.s[3]
+ // vl128 state = 0xf4c6c098
+ __ dci(0x44e238af); // sqdmlslb z15.d, z5.s, z2.s[1]
+ // vl128 state = 0x6e7cb20c
+ __ dci(0x44e639ad); // sqdmlslb z13.d, z13.s, z6.s[1]
+ // vl128 state = 0xed16e292
+ __ dci(0x44a63daf); // sqdmlslt z15.s, z13.h, z6.h[1]
+ // vl128 state = 0x7c0c3a9a
+ __ dci(0x44ae3cbf); // sqdmlslt z31.s, z5.h, z6.h[3]
+ // vl128 state = 0x0e2dce8d
+ __ dci(0x44a634b7); // sqdmlslt z23.s, z5.h, z6.h[0]
+ // vl128 state = 0xf3eeab27
+ __ dci(0x44e234b5); // sqdmlslt z21.d, z5.s, z2.s[0]
+ // vl128 state = 0x55193209
+ __ dci(0x44a23437); // sqdmlslt z23.s, z1.h, z2.h[0]
+ // vl128 state = 0x7652b538
+ __ dci(0x44a63535); // sqdmlslt z21.s, z9.h, z6.h[0]
+ // vl128 state = 0x76046ab4
+ __ dci(0x44a235b4); // sqdmlslt z20.s, z13.h, z2.h[0]
+ // vl128 state = 0x2f23fd0d
+ __ dci(0x44a234e4); // sqdmlslt z4.s, z7.h, z2.h[0]
+ // vl128 state = 0x2a50774c
+ __ dci(0x44a234ec); // sqdmlslt z12.s, z7.h, z2.h[0]
+ // vl128 state = 0x01ea8843
+ __ dci(0x44a324e8); // sqdmlalt z8.s, z7.h, z3.h[0]
+ // vl128 state = 0xed54a157
+ __ dci(0x44a334c9); // sqdmlslt z9.s, z6.h, z3.h[0]
+ // vl128 state = 0x39e0227b
+ __ dci(0x44a324f9); // sqdmlalt z25.s, z7.h, z3.h[0]
+ // vl128 state = 0xf163fa0b
+ __ dci(0x44a224d8); // sqdmlalt z24.s, z6.h, z2.h[0]
+ // vl128 state = 0xbb4e0d24
+ __ dci(0x44b22448); // sqdmlalt z8.s, z2.h, z2.h[4]
+ // vl128 state = 0x26c102cc
+ __ dci(0x44f224d8); // sqdmlalt z24.d, z6.s, z2.s[2]
+ // vl128 state = 0x40f79dde
+ __ dci(0x44f220f9); // sqdmlalb z25.d, z7.s, z2.s[2]
+ // vl128 state = 0xf9d62034
+ __ dci(0x44f020a9); // sqdmlalb z9.d, z5.s, z0.s[2]
+ // vl128 state = 0x2b78be2f
+ __ dci(0x44f424ad); // sqdmlalt z13.d, z5.s, z4.s[2]
+ // vl128 state = 0xf0701e23
+ __ dci(0x44f430a5); // sqdmlslb z5.d, z5.s, z4.s[2]
+ // vl128 state = 0x992b12d6
+ __ dci(0x44f130a4); // sqdmlslb z4.d, z5.s, z1.s[2]
+ // vl128 state = 0x50292759
+ __ dci(0x44f130ac); // sqdmlslb z12.d, z5.s, z1.s[2]
+ // vl128 state = 0x795462f2
+ __ dci(0x44f3302d); // sqdmlslb z13.d, z1.s, z3.s[2]
+ // vl128 state = 0x8ac29815
+ __ dci(0x44e3300c); // sqdmlslb z12.d, z0.s, z3.s[0]
+ // vl128 state = 0x842471eb
+ __ dci(0x44e3300d); // sqdmlslb z13.d, z0.s, z3.s[0]
+ // vl128 state = 0x28762af1
+ __ dci(0x44eb321d); // sqdmlslb z29.d, z16.s, z11.s[0]
+ // vl128 state = 0x352de071
+ __ dci(0x44ef3259); // sqdmlslb z25.d, z18.s, z15.s[0]
+ // vl128 state = 0x90a4cf15
+ __ dci(0x44ff3349); // sqdmlslb z9.d, z26.s, z15.s[2]
+ // vl128 state = 0x6be7e76a
+ __ dci(0x44fb3319); // sqdmlslb z25.d, z24.s, z11.s[2]
+ // vl128 state = 0x7023e2de
+ __ dci(0x44bb3b18); // sqdmlslb z24.s, z24.h, z3.h[7]
+ // vl128 state = 0xad48664c
+ __ dci(0x44bb3b19); // sqdmlslb z25.s, z24.h, z3.h[7]
+ // vl128 state = 0xc7d8239b
+ __ dci(0x44bb3b11); // sqdmlslb z17.s, z24.h, z3.h[7]
+ // vl128 state = 0x0d9b2b9b
+ __ dci(0x44f33b15); // sqdmlslb z21.d, z24.s, z3.s[3]
+ // vl128 state = 0xbdb9c559
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xbdb9c559,
+ 0x0c2f83d5,
+ 0x3e1f2607,
+ 0x2db954ea,
+ 0xff33857d,
+ 0xd567c205,
+ 0x8b5ced4c,
+ 0x19ecc4d9,
+ 0x8581949e,
+ 0x30f1a921,
+ 0x8c94071b,
+ 0xb9ad4919,
+ 0x32dbb108,
+ 0x634f9cd4,
+ 0x2a122429,
+ 0xdae127f1,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_floating_multiply_add_long_vector) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm, kFpInputSet);
+ // state = 0x1e5cbcac
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x64bfa635); // fmlslt z21.s, z17.h, z31.h
+ // vl128 state = 0x48383595
+ __ dci(0x64bf867d); // fmlalt z29.s, z19.h, z31.h
+ // vl128 state = 0xf2812c0e
+ __ dci(0x64af877c); // fmlalt z28.s, z27.h, z15.h
+ // vl128 state = 0x161daf06
+ __ dci(0x64af8774); // fmlalt z20.s, z27.h, z15.h
+ // vl128 state = 0x8146f2bf
+ __ dci(0x64be877c); // fmlalt z28.s, z27.h, z30.h
+ // vl128 state = 0x90bcd864
+ __ dci(0x64bd876c); // fmlalt z12.s, z27.h, z29.h
+ // vl128 state = 0x22b60b78
+ __ dci(0x64bf8728); // fmlalt z8.s, z25.h, z31.h
+ // vl128 state = 0x2c9ce51a
+ __ dci(0x64bf836a); // fmlalb z10.s, z27.h, z31.h
+ // vl128 state = 0x40e6b398
+ __ dci(0x64bf87eb); // fmlalt z11.s, z31.h, z31.h
+ // vl128 state = 0x479c4a98
+ __ dci(0x64bf87e9); // fmlalt z9.s, z31.h, z31.h
+ // vl128 state = 0x25c987ad
+ __ dci(0x64b78779); // fmlalt z25.s, z27.h, z23.h
+ // vl128 state = 0xb4fbc429
+ __ dci(0x64b1877b); // fmlalt z27.s, z27.h, z17.h
+ // vl128 state = 0x390616d8
+ __ dci(0x64b1871f); // fmlalt z31.s, z24.h, z17.h
+ // vl128 state = 0x7f24d2bf
+ __ dci(0x64b5878f); // fmlalt z15.s, z28.h, z21.h
+ // vl128 state = 0x01a90318
+ __ dci(0x64b4870d); // fmlalt z13.s, z24.h, z20.h
+ // vl128 state = 0x08789c2c
+ __ dci(0x64b48709); // fmlalt z9.s, z24.h, z20.h
+ // vl128 state = 0x169f9b57
+ __ dci(0x64b48779); // fmlalt z25.s, z27.h, z20.h
+ // vl128 state = 0xad4f23d7
+ __ dci(0x64bc8671); // fmlalt z17.s, z19.h, z28.h
+ // vl128 state = 0xf86b0a64
+ __ dci(0x64b98673); // fmlalt z19.s, z19.h, z25.h
+ // vl128 state = 0x78a848b2
+ __ dci(0x64b18623); // fmlalt z3.s, z17.h, z17.h
+ // vl128 state = 0xcac211c9
+ __ dci(0x64b18642); // fmlalt z2.s, z18.h, z17.h
+ // vl128 state = 0x9afcbe3f
+ __ dci(0x64b1a6c0); // fmlslt z0.s, z22.h, z17.h
+ // vl128 state = 0x0047e4b2
+ __ dci(0x64b086c4); // fmlalt z4.s, z22.h, z16.h
+ // vl128 state = 0x203324b5
+ __ dci(0x64b28645); // fmlalt z5.s, z18.h, z18.h
+ // vl128 state = 0x7340c432
+ __ dci(0x64b28264); // fmlalb z4.s, z19.h, z18.h
+ // vl128 state = 0x6dc657a9
+ __ dci(0x64b28765); // fmlalt z5.s, z27.h, z18.h
+ // vl128 state = 0xa5d3889b
+ __ dci(0x64ba8561); // fmlalt z1.s, z11.h, z26.h
+ // vl128 state = 0x5bbd2dd9
+ __ dci(0x64aa8543); // fmlalt z3.s, z10.h, z10.h
+ // vl128 state = 0xa65ec305
+ __ dci(0x64ae8141); // fmlalb z1.s, z10.h, z14.h
+ // vl128 state = 0xd23d588c
+ __ dci(0x64ae80c3); // fmlalb z3.s, z6.h, z14.h
+ // vl128 state = 0x5a082bbc
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x5a082bbc,
+ 0x23c41852,
+ 0xf462f328,
+ 0x6fa4d12b,
+ 0x5e5f3e79,
+ 0x9939c7e6,
+ 0x0ed39313,
+ 0x2911107c,
+ 0x18f77b9a,
+ 0x7226d5b3,
+ 0x05df3c07,
+ 0x1653749c,
+ 0xcb4f6acf,
+ 0x4c5f0755,
+ 0xc4eed654,
+ 0x47893eeb,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_mla_long_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44ea8d67); // smlalt z7.d, z11.s, z10.s[1]
+ // vl128 state = 0xd08dbe24
+ __ dci(0x44ea9d2f); // umlalt z15.d, z9.s, z10.s[1]
+ // vl128 state = 0x56f6f237
+ __ dci(0x44ea9d2d); // umlalt z13.d, z9.s, z10.s[1]
+ // vl128 state = 0x00f89e4d
+ __ dci(0x44eb992f); // umlalb z15.d, z9.s, z11.s[1]
+ // vl128 state = 0xca4e469e
+ __ dci(0x44ab99ae); // umlalb z14.s, z13.h, z3.h[3]
+ // vl128 state = 0xd4b18276
+ __ dci(0x44ad99be); // umlalb z30.s, z13.h, z5.h[3]
+ // vl128 state = 0x8650a79e
+ __ dci(0x44ad99ba); // umlalb z26.s, z13.h, z5.h[3]
+ // vl128 state = 0x6fa1a501
+ __ dci(0x44adb9f2); // umlslb z18.s, z15.h, z5.h[3]
+ // vl128 state = 0x1a56a5d4
+ __ dci(0x44bda9f3); // smlslb z19.s, z15.h, z5.h[7]
+ // vl128 state = 0xfdb18057
+ __ dci(0x44b9a1fb); // smlslb z27.s, z15.h, z1.h[6]
+ // vl128 state = 0xb46b6c28
+ __ dci(0x44b8a1b3); // smlslb z19.s, z13.h, z0.h[6]
+ // vl128 state = 0x623c62c3
+ __ dci(0x44bc81b1); // smlalb z17.s, z13.h, z4.h[6]
+ // vl128 state = 0x2abab4d3
+ __ dci(0x44bc82b0); // smlalb z16.s, z21.h, z4.h[6]
+ // vl128 state = 0x7a028731
+ __ dci(0x44ac92b8); // umlalb z24.s, z21.h, z4.h[2]
+ // vl128 state = 0xf48f6936
+ __ dci(0x44a4923a); // umlalb z26.s, z17.h, z4.h[0]
+ // vl128 state = 0xbcdf888d
+ __ dci(0x44b49a3e); // umlalb z30.s, z17.h, z4.h[5]
+ // vl128 state = 0x5060778e
+ __ dci(0x44b69a1c); // umlalb z28.s, z16.h, z6.h[5]
+ // vl128 state = 0x16da3835
+ __ dci(0x44b6b218); // umlslb z24.s, z16.h, z6.h[4]
+ // vl128 state = 0xac7fb4d0
+ __ dci(0x44b2b25a); // umlslb z26.s, z18.h, z2.h[4]
+ // vl128 state = 0x8d05433b
+ __ dci(0x44b2ba0a); // umlslb z10.s, z16.h, z2.h[5]
+ // vl128 state = 0x62630101
+ __ dci(0x44b29b08); // umlalb z8.s, z24.h, z2.h[5]
+ // vl128 state = 0x31ae445b
+ __ dci(0x44b29b00); // umlalb z0.s, z24.h, z2.h[5]
+ // vl128 state = 0x539a5875
+ __ dci(0x44b29e08); // umlalt z8.s, z16.h, z2.h[5]
+ // vl128 state = 0x07d4bf73
+ __ dci(0x44b29eaa); // umlalt z10.s, z21.h, z2.h[5]
+ // vl128 state = 0x314f48a8
+ __ dci(0x44b2be2e); // umlslt z14.s, z17.h, z2.h[5]
+ // vl128 state = 0x91bd2c17
+ __ dci(0x44b2be3e); // umlslt z30.s, z17.h, z2.h[5]
+ // vl128 state = 0x4cbf4360
+ __ dci(0x44f2be7a); // umlslt z26.d, z19.s, z2.s[3]
+ // vl128 state = 0xe94e76a9
+ __ dci(0x44f2ae4a); // smlslt z10.d, z18.s, z2.s[3]
+ // vl128 state = 0xd0c2c4cc
+ __ dci(0x44faae6e); // smlslt z14.d, z19.s, z10.s[3]
+ // vl128 state = 0xc64d6839
+ __ dci(0x44faae6f); // smlslt z15.d, z19.s, z10.s[3]
+ // vl128 state = 0xa74358aa
+ __ dci(0x44faae67); // smlslt z7.d, z19.s, z10.s[3]
+ // vl128 state = 0xb8d9664b
+ __ dci(0x44fa8e57); // smlalt z23.d, z18.s, z10.s[3]
+ // vl128 state = 0xf1032ab4
+ __ dci(0x44fa8c67); // smlalt z7.d, z3.s, z10.s[3]
+ // vl128 state = 0x763732f4
+ __ dci(0x44eaac66); // smlslt z6.d, z3.s, z10.s[1]
+ // vl128 state = 0xdcf39367
+ __ dci(0x44eaa456); // smlslt z22.d, z2.s, z10.s[0]
+ // vl128 state = 0x5ea67d82
+ __ dci(0x44aea45e); // smlslt z30.s, z2.h, z6.h[2]
+ // vl128 state = 0x55da0908
+ __ dci(0x44aaa64e); // smlslt z14.s, z18.h, z2.h[2]
+ // vl128 state = 0x69d105f5
+ __ dci(0x44baa75e); // smlslt z30.s, z26.h, z2.h[6]
+ // vl128 state = 0x191bc065
+ __ dci(0x44baa75a); // smlslt z26.s, z26.h, z2.h[6]
+ // vl128 state = 0xbf62d2a0
+ __ dci(0x44eaa75b); // smlslt z27.d, z26.s, z10.s[0]
+ // vl128 state = 0x43803a21
+ __ dci(0x44eabf5f); // umlslt z31.d, z26.s, z10.s[1]
+ // vl128 state = 0x0b33725c
+ __ dci(0x44ebbd57); // umlslt z23.d, z10.s, z11.s[1]
+ // vl128 state = 0x0059a0f5
+ __ dci(0x44abbf55); // umlslt z21.s, z26.h, z3.h[3]
+ // vl128 state = 0xb587057f
+ __ dci(0x44abab5d); // smlslb z29.s, z26.h, z3.h[3]
+ // vl128 state = 0x0bfa30c6
+ __ dci(0x44abab5c); // smlslb z28.s, z26.h, z3.h[3]
+ // vl128 state = 0x151045b4
+ __ dci(0x44abaf78); // smlslt z24.s, z27.h, z3.h[3]
+ // vl128 state = 0xedb7fca9
+ __ dci(0x44aaa77c); // smlslt z28.s, z27.h, z2.h[2]
+ // vl128 state = 0xb68216f9
+ __ dci(0x44aaa178); // smlslb z24.s, z11.h, z2.h[2]
+ // vl128 state = 0x35447b11
+ __ dci(0x44aa81fa); // smlalb z26.s, z15.h, z2.h[2]
+ // vl128 state = 0xf532285f
+ __ dci(0x44aa8198); // smlalb z24.s, z12.h, z2.h[2]
+ // vl128 state = 0xd414889b
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xd414889b,
+ 0x79d8f659,
+ 0xe2c8f06b,
+ 0x91aadf3d,
+ 0xffb92c3e,
+ 0xc2d3138e,
+ 0xdd9f4396,
+ 0xce39a88e,
+ 0xfe68a5ca,
+ 0xdcb072b2,
+ 0x3756ede6,
+ 0x5c2eef22,
+ 0x01fd02a4,
+ 0xdd8d4890,
+ 0x87500dc9,
+ 0x8c895325,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_mul_long_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44f1d492); // umullt z18.d, z4.s, z1.s[2]
+ // vl128 state = 0x4377a821
+ __ dci(0x44fdd490); // umullt z16.d, z4.s, z13.s[2]
+ // vl128 state = 0x5879cb00
+ __ dci(0x44fdc080); // smullb z0.d, z4.s, z13.s[2]
+ // vl128 state = 0xbe0f85f8
+ __ dci(0x44fdc081); // smullb z1.d, z4.s, z13.s[2]
+ // vl128 state = 0xa0eb0d63
+ __ dci(0x44fcc000); // smullb z0.d, z0.s, z12.s[2]
+ // vl128 state = 0xf023feb2
+ __ dci(0x44ffc001); // smullb z1.d, z0.s, z15.s[2]
+ // vl128 state = 0xcc0dcc10
+ __ dci(0x44ffc0c9); // smullb z9.d, z6.s, z15.s[2]
+ // vl128 state = 0x8e0d2525
+ __ dci(0x44f7d0c8); // umullb z8.d, z6.s, z7.s[2]
+ // vl128 state = 0xaf711253
+ __ dci(0x44b7d080); // umullb z0.s, z4.h, z7.h[4]
+ // vl128 state = 0x8cea3501
+ __ dci(0x44f7d290); // umullb z16.d, z20.s, z7.s[2]
+ // vl128 state = 0x09be9a84
+ __ dci(0x44f6da92); // umullb z18.d, z20.s, z6.s[3]
+ // vl128 state = 0x3906715f
+ __ dci(0x44fed296); // umullb z22.d, z20.s, z14.s[2]
+ // vl128 state = 0xf399bb76
+ __ dci(0x44f6c292); // smullb z18.d, z20.s, z6.s[2]
+ // vl128 state = 0x33ceff98
+ __ dci(0x44e6c2a2); // smullb z2.d, z21.s, z6.s[0]
+ // vl128 state = 0x00765739
+ __ dci(0x44e6c323); // smullb z3.d, z25.s, z6.s[0]
+ // vl128 state = 0x3dad5b1f
+ __ dci(0x44e6c333); // smullb z19.d, z25.s, z6.s[0]
+ // vl128 state = 0xc5b39601
+ __ dci(0x44e7c377); // smullb z23.d, z27.s, z7.s[0]
+ // vl128 state = 0x134b3d1f
+ __ dci(0x44e7d3ff); // umullb z31.d, z31.s, z7.s[0]
+ // vl128 state = 0xc4be3961
+ __ dci(0x44e7d3fe); // umullb z30.d, z31.s, z7.s[0]
+ // vl128 state = 0x195e406b
+ __ dci(0x44e7c3da); // smullb z26.d, z30.s, z7.s[0]
+ // vl128 state = 0xae2522f9
+ __ dci(0x44e7c2fe); // smullb z30.d, z23.s, z7.s[0]
+ // vl128 state = 0xed267bfb
+ __ dci(0x44e3c3f6); // smullb z22.d, z31.s, z3.s[0]
+ // vl128 state = 0x6f6eeec4
+ __ dci(0x44f3c2f2); // smullb z18.d, z23.s, z3.s[2]
+ // vl128 state = 0x1689afdf
+ __ dci(0x44f3c2e2); // smullb z2.d, z23.s, z3.s[2]
+ // vl128 state = 0x24999374
+ __ dci(0x44f3c06a); // smullb z10.d, z3.s, z3.s[2]
+ // vl128 state = 0x046126eb
+ __ dci(0x44f3c06b); // smullb z11.d, z3.s, z3.s[2]
+ // vl128 state = 0x6b39941f
+ __ dci(0x44f3c449); // smullt z9.d, z2.s, z3.s[2]
+ // vl128 state = 0xf161bcc6
+ __ dci(0x44f3ccc8); // smullt z8.d, z6.s, z3.s[3]
+ // vl128 state = 0xbdc67c89
+ __ dci(0x44f9ccd8); // smullt z24.d, z6.s, z9.s[3]
+ // vl128 state = 0xfed59871
+ __ dci(0x44ffccdc); // smullt z28.d, z6.s, z15.s[3]
+ // vl128 state = 0x72746ff6
+ __ dci(0x44fecc58); // smullt z24.d, z2.s, z14.s[3]
+ // vl128 state = 0xa15ee8f2
+ __ dci(0x44bfcc48); // smullt z8.s, z2.h, z7.h[7]
+ // vl128 state = 0x3dccd2d6
+ __ dci(0x44b7c84a); // smullb z10.s, z2.h, z7.h[5]
+ // vl128 state = 0x4537f0b2
+ __ dci(0x44a5c84e); // smullb z14.s, z2.h, z5.h[1]
+ // vl128 state = 0x60e30690
+ __ dci(0x44adca46); // smullb z6.s, z18.h, z5.h[3]
+ // vl128 state = 0xaef15cb5
+ __ dci(0x44add847); // umullb z7.s, z2.h, z5.h[3]
+ // vl128 state = 0xe7df553d
+ __ dci(0x44bdd04f); // umullb z15.s, z2.h, z5.h[6]
+ // vl128 state = 0xa713f809
+ __ dci(0x44bdc007); // smullb z7.s, z0.h, z5.h[6]
+ // vl128 state = 0x4907c6b7
+ __ dci(0x44bdc005); // smullb z5.s, z0.h, z5.h[6]
+ // vl128 state = 0x98a83fd0
+ __ dci(0x44bdc0b5); // smullb z21.s, z5.h, z5.h[6]
+ // vl128 state = 0x3e6cb588
+ __ dci(0x44bcc094); // smullb z20.s, z4.h, z4.h[6]
+ // vl128 state = 0x37e5a4ce
+ __ dci(0x44bcc09c); // smullb z28.s, z4.h, z4.h[6]
+ // vl128 state = 0x719de631
+ __ dci(0x44acc88c); // smullb z12.s, z4.h, z4.h[3]
+ // vl128 state = 0xf0f3dffe
+ __ dci(0x44aac884); // smullb z4.s, z4.h, z2.h[3]
+ // vl128 state = 0x61a714ff
+ __ dci(0x44a8c8ac); // smullb z12.s, z5.h, z0.h[3]
+ // vl128 state = 0xc47542ea
+ __ dci(0x44a8cea4); // smullt z4.s, z21.h, z0.h[3]
+ // vl128 state = 0x37865031
+ __ dci(0x44a8daa5); // umullb z5.s, z21.h, z0.h[3]
+ // vl128 state = 0x28cf4dc6
+ __ dci(0x44b8dae4); // umullb z4.s, z23.h, z0.h[7]
+ // vl128 state = 0x6fe181d0
+ __ dci(0x44b9da6c); // umullb z12.s, z19.h, z1.h[7]
+ // vl128 state = 0xde65c7e3
+ __ dci(0x44b9da64); // umullb z4.s, z19.h, z1.h[7]
+ // vl128 state = 0x040a7e45
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x040a7e45,
+ 0x48fc4c2b,
+ 0x9a1c67d1,
+ 0xcb88ffdd,
+ 0xcda205bc,
+ 0x7a47b6fb,
+ 0x68ae16c8,
+ 0x483353c9,
+ 0x91d91835,
+ 0x17a9ca4a,
+ 0x4f3d394f,
+ 0x5182776c,
+ 0xc03c1d3b,
+ 0xe52799db,
+ 0x1ddd328e,
+ 0xe33903de,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sat_double_mul_high) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x046c711a); // sqdmulh z26.h, z8.h, z12.h
+ // vl128 state = 0xe962209c
+ __ dci(0x047c7138); // sqdmulh z24.h, z9.h, z28.h
+ // vl128 state = 0x06a43320
+ __ dci(0x04fc7539); // sqrdmulh z25.d, z9.d, z28.d
+ // vl128 state = 0x8ce1cad6
+ __ dci(0x04fc7029); // sqdmulh z9.d, z1.d, z28.d
+ // vl128 state = 0x6f3d1b22
+ __ dci(0x04ac702d); // sqdmulh z13.s, z1.s, z12.s
+ // vl128 state = 0x14b0451c
+ __ dci(0x04a4742c); // sqrdmulh z12.s, z1.s, z4.s
+ // vl128 state = 0x60206a6a
+ __ dci(0x04a574ad); // sqrdmulh z13.s, z5.s, z5.s
+ // vl128 state = 0x388a9786
+ __ dci(0x04a574a9); // sqrdmulh z9.s, z5.s, z5.s
+ // vl128 state = 0xee590c43
+ __ dci(0x04e574e8); // sqrdmulh z8.d, z7.d, z5.d
+ // vl128 state = 0x8d16295c
+ __ dci(0x04e570ca); // sqdmulh z10.d, z6.d, z5.d
+ // vl128 state = 0x2a5c234c
+ __ dci(0x04e670cb); // sqdmulh z11.d, z6.d, z6.d
+ // vl128 state = 0xfacc9e06
+ __ dci(0x04f6708f); // sqdmulh z15.d, z4.d, z22.d
+ // vl128 state = 0x2167ca56
+ __ dci(0x04f67087); // sqdmulh z7.d, z4.d, z22.d
+ // vl128 state = 0xc7d7af1d
+ __ dci(0x04f77185); // sqdmulh z5.d, z12.d, z23.d
+ // vl128 state = 0x15f82ac2
+ __ dci(0x04f67104); // sqdmulh z4.d, z8.d, z22.d
+ // vl128 state = 0xb2484707
+ __ dci(0x04f6710c); // sqdmulh z12.d, z8.d, z22.d
+ // vl128 state = 0x5a53b8e7
+ __ dci(0x04f6708d); // sqdmulh z13.d, z4.d, z22.d
+ // vl128 state = 0xa9affac2
+ __ dci(0x04f67085); // sqdmulh z5.d, z4.d, z22.d
+ // vl128 state = 0xa425052d
+ __ dci(0x04fe7281); // sqdmulh z1.d, z20.d, z30.d
+ // vl128 state = 0x1c0f565c
+ __ dci(0x04ee72d1); // sqdmulh z17.d, z22.d, z14.d
+ // vl128 state = 0xff12c401
+ __ dci(0x04ee7393); // sqdmulh z19.d, z28.d, z14.d
+ // vl128 state = 0xcd1d9d3a
+ __ dci(0x04ec73b2); // sqdmulh z18.d, z29.d, z12.d
+ // vl128 state = 0x2aa94767
+ __ dci(0x04ee73fa); // sqdmulh z26.d, z31.d, z14.d
+ // vl128 state = 0x5ca68e9c
+ __ dci(0x04ef77ea); // sqrdmulh z10.d, z31.d, z15.d
+ // vl128 state = 0xe5b65473
+ __ dci(0x04ff76e8); // sqrdmulh z8.d, z23.d, z31.d
+ // vl128 state = 0xcc4e8803
+ __ dci(0x04fd76c9); // sqrdmulh z9.d, z22.d, z29.d
+ // vl128 state = 0x19fff884
+ __ dci(0x04fd73d9); // sqdmulh z25.d, z30.d, z29.d
+ // vl128 state = 0xb99d6147
+ __ dci(0x04e973dd); // sqdmulh z29.d, z30.d, z9.d
+ // vl128 state = 0xe8f11301
+ __ dci(0x04b973dc); // sqdmulh z28.s, z30.s, z25.s
+ // vl128 state = 0x24af5ffe
+ __ dci(0x04b177dd); // sqrdmulh z29.s, z30.s, z17.s
+ // vl128 state = 0x5c32a08e
+ __ dci(0x04b177bc); // sqrdmulh z28.s, z29.s, z17.s
+ // vl128 state = 0x12c8c1c4
+ __ dci(0x04f377ac); // sqrdmulh z12.d, z29.d, z19.d
+ // vl128 state = 0x7bc1f2e6
+ __ dci(0x04f677ad); // sqrdmulh z13.d, z29.d, z22.d
+ // vl128 state = 0x67d2640f
+ __ dci(0x04fe76af); // sqrdmulh z15.d, z21.d, z30.d
+ // vl128 state = 0x98035fbd
+ __ dci(0x04ef76ae); // sqrdmulh z14.d, z21.d, z15.d
+ // vl128 state = 0x5e561fd3
+ __ dci(0x04ee72ac); // sqdmulh z12.d, z21.d, z14.d
+ // vl128 state = 0xb56c3914
+ __ dci(0x04ae72ee); // sqdmulh z14.s, z23.s, z14.s
+ // vl128 state = 0x6bb1c4b1
+ __ dci(0x04be7266); // sqdmulh z6.s, z19.s, z30.s
+ // vl128 state = 0x5a5bdda6
+ __ dci(0x04b67364); // sqdmulh z4.s, z27.s, z22.s
+ // vl128 state = 0x09a447ea
+ __ dci(0x04b27165); // sqdmulh z5.s, z11.s, z18.s
+ // vl128 state = 0xee84be35
+ __ dci(0x04b27175); // sqdmulh z21.s, z11.s, z18.s
+ // vl128 state = 0x84146d85
+ __ dci(0x04ba7137); // sqdmulh z23.s, z9.s, z26.s
+ // vl128 state = 0x92c2e5f6
+ __ dci(0x04b3713f); // sqdmulh z31.s, z9.s, z19.s
+ // vl128 state = 0xe3836fb8
+ __ dci(0x04b37017); // sqdmulh z23.s, z0.s, z19.s
+ // vl128 state = 0xb5225206
+ __ dci(0x04b37615); // sqrdmulh z21.s, z16.s, z19.s
+ // vl128 state = 0x157484c7
+ __ dci(0x04b37491); // sqrdmulh z17.s, z4.s, z19.s
+ // vl128 state = 0x586c4bbf
+ __ dci(0x04b37481); // sqrdmulh z1.s, z4.s, z19.s
+ // vl128 state = 0xf5dc07cb
+ __ dci(0x04b37489); // sqrdmulh z9.s, z4.s, z19.s
+ // vl128 state = 0x591875a8
+ __ dci(0x04b5748d); // sqrdmulh z13.s, z4.s, z21.s
+ // vl128 state = 0xb01f8fd5
+ __ dci(0x043d748f); // sqrdmulh z15.b, z4.b, z29.b
+ // vl128 state = 0xd466a58c
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xd466a58c,
+ 0xe2ec7fba,
+ 0x1644e93a,
+ 0x7c3ecb2e,
+ 0xed4ecd78,
+ 0xfd5b5783,
+ 0xa7094efe,
+ 0x92bd623f,
+ 0x6da5e423,
+ 0x1648b588,
+ 0x63ce5947,
+ 0xba9c7d90,
+ 0x756ae20d,
+ 0x6d4032ba,
+ 0x87ae8b8f,
+ 0x722b2f6f,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_cmla_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x44e867e6); // cmla z6.s, z31.s, z8.s[0], #90
+ // vl128 state = 0xee56e69b
+ __ dci(0x44e86de4); // cmla z4.s, z15.s, z8.s[0], #270
+ // vl128 state = 0x0ed2e9f5
+ __ dci(0x44e86be5); // cmla z5.s, z31.s, z8.s[0], #180
+ // vl128 state = 0x9074e2a6
+ __ dci(0x44eb6bf5); // cmla z21.s, z31.s, z11.s[0], #180
+ // vl128 state = 0x8f43b8a8
+ __ dci(0x44eb6b31); // cmla z17.s, z25.s, z11.s[0], #180
+ // vl128 state = 0xb6c51b97
+ __ dci(0x44eb6135); // cmla z21.s, z9.s, z11.s[0], #0
+ // vl128 state = 0x4236beed
+ __ dci(0x44e9633d); // cmla z29.s, z25.s, z9.s[0], #0
+ // vl128 state = 0x21879fe6
+ __ dci(0x44f96379); // cmla z25.s, z27.s, z9.s[1], #0
+ // vl128 state = 0x78172805
+ __ dci(0x44fd6349); // cmla z9.s, z26.s, z13.s[1], #0
+ // vl128 state = 0x242a3ae5
+ __ dci(0x44f76341); // cmla z1.s, z26.s, z7.s[1], #0
+ // vl128 state = 0xa734ef3b
+ __ dci(0x44f36305); // cmla z5.s, z24.s, z3.s[1], #0
+ // vl128 state = 0x00a035b1
+ __ dci(0x44f76381); // cmla z1.s, z28.s, z7.s[1], #0
+ // vl128 state = 0xbdfda3d4
+ __ dci(0x44f763e3); // cmla z3.s, z31.s, z7.s[1], #0
+ // vl128 state = 0xe1ed6ed9
+ __ dci(0x44b763cb); // cmla z11.h, z30.h, z7.h[2], #0
+ // vl128 state = 0xae645ea8
+ __ dci(0x44a763e9); // cmla z9.h, z31.h, z7.h[0], #0
+ // vl128 state = 0x392b3511
+ __ dci(0x44a762ab); // cmla z11.h, z21.h, z7.h[0], #0
+ // vl128 state = 0x3a05f729
+ __ dci(0x44a66aaf); // cmla z15.h, z21.h, z6.h[0], #180
+ // vl128 state = 0x7cfa0c08
+ __ dci(0x44a66aa7); // cmla z7.h, z21.h, z6.h[0], #180
+ // vl128 state = 0x91749f43
+ __ dci(0x44a663a5); // cmla z5.h, z29.h, z6.h[0], #0
+ // vl128 state = 0x438479ab
+ __ dci(0x44a66bed); // cmla z13.h, z31.h, z6.h[0], #180
+ // vl128 state = 0xc25ce86d
+ __ dci(0x44f66be9); // cmla z9.s, z31.s, z6.s[1], #180
+ // vl128 state = 0x6e8bdeca
+ __ dci(0x44b66bd9); // cmla z25.h, z30.h, z6.h[2], #180
+ // vl128 state = 0x04745a63
+ __ dci(0x44b66bd8); // cmla z24.h, z30.h, z6.h[2], #180
+ // vl128 state = 0xbfc59a82
+ __ dci(0x44b66b7c); // cmla z28.h, z27.h, z6.h[2], #180
+ // vl128 state = 0x12d70fc2
+ __ dci(0x44b6617e); // cmla z30.h, z11.h, z6.h[2], #0
+ // vl128 state = 0x53f4b9a1
+ __ dci(0x44b7697c); // cmla z28.h, z11.h, z7.h[2], #180
+ // vl128 state = 0x74e99c24
+ __ dci(0x44b3692c); // cmla z12.h, z9.h, z3.h[2], #180
+ // vl128 state = 0xdc80a875
+ __ dci(0x44a1692e); // cmla z14.h, z9.h, z1.h[0], #180
+ // vl128 state = 0x307af313
+ __ dci(0x44b169af); // cmla z15.h, z13.h, z1.h[2], #180
+ // vl128 state = 0xc92b23fe
+ __ dci(0x44b165a7); // cmla z7.h, z13.h, z1.h[2], #90
+ // vl128 state = 0x33a52d1c
+ __ dci(0x44b165a5); // cmla z5.h, z13.h, z1.h[2], #90
+ // vl128 state = 0xbc53ebfc
+ __ dci(0x44f161a1); // cmla z1.s, z13.s, z1.s[1], #0
+ // vl128 state = 0x7ba34076
+ __ dci(0x44f261a0); // cmla z0.s, z13.s, z2.s[1], #0
+ // vl128 state = 0x6fa2bab8
+ __ dci(0x44b361b0); // cmla z16.h, z13.h, z3.h[2], #0
+ // vl128 state = 0xaae67807
+ __ dci(0x44b36092); // cmla z18.h, z4.h, z3.h[2], #0
+ // vl128 state = 0xf1b05dff
+ __ dci(0x44b36202); // cmla z2.h, z16.h, z3.h[2], #0
+ // vl128 state = 0xd226bf15
+ __ dci(0x44b36a20); // cmla z0.h, z17.h, z3.h[2], #180
+ // vl128 state = 0x6a8ade58
+ __ dci(0x44b26a10); // cmla z16.h, z16.h, z2.h[2], #180
+ // vl128 state = 0x075e00e4
+ __ dci(0x44b26a18); // cmla z24.h, z16.h, z2.h[2], #180
+ // vl128 state = 0x9bcef7bd
+ __ dci(0x44b06a28); // cmla z8.h, z17.h, z0.h[2], #180
+ // vl128 state = 0x8ac6d4b3
+ __ dci(0x44b06a2a); // cmla z10.h, z17.h, z0.h[2], #180
+ // vl128 state = 0x51993d51
+ __ dci(0x44b0620b); // cmla z11.h, z16.h, z0.h[2], #0
+ // vl128 state = 0x6d134734
+ __ dci(0x44b06209); // cmla z9.h, z16.h, z0.h[2], #0
+ // vl128 state = 0x0ee4031f
+ __ dci(0x44f06a0d); // cmla z13.s, z16.s, z0.s[1], #180
+ // vl128 state = 0x08ea247b
+ __ dci(0x44f06b2c); // cmla z12.s, z25.s, z0.s[1], #180
+ // vl128 state = 0x6acbb19a
+ __ dci(0x44f1692d); // cmla z13.s, z9.s, z1.s[1], #180
+ // vl128 state = 0x3ea2d161
+ __ dci(0x44b36925); // cmla z5.h, z9.h, z3.h[2], #180
+ // vl128 state = 0x5b962e9b
+ __ dci(0x44b36921); // cmla z1.h, z9.h, z3.h[2], #180
+ // vl128 state = 0x029f0eca
+ __ dci(0x44b36d69); // cmla z9.h, z11.h, z3.h[2], #270
+ // vl128 state = 0x39a63c65
+ __ dci(0x44bb6d28); // cmla z8.h, z9.h, z3.h[3], #270
+ // vl128 state = 0x6d58c136
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x6d58c136,
+ 0xfbdbae97,
+ 0x85c3cf1a,
+ 0xe4b53177,
+ 0x2f714586,
+ 0xde1afee8,
+ 0xd9613d2e,
+ 0x842c85a6,
+ 0xdc285523,
+ 0xccba7ba9,
+ 0x79e1e6f7,
+ 0xb19427f4,
+ 0x20d08a3a,
+ 0xfb7f4c43,
+ 0x0721ed60,
+ 0x4ee795ab,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_flogb) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x651cb31e); // flogb z30.s, p4/m, z24.s
+ // vl128 state = 0x161f1855
+ __ dci(0x651cb3ae); // flogb z14.s, p4/m, z29.s
+ // vl128 state = 0xf9e5ce4d
+ __ dci(0x651cb3be); // flogb z30.s, p4/m, z29.s
+ // vl128 state = 0xa06176bc
+ __ dci(0x651ea3bc); // flogb z28.d, p0/m, z29.d
+ // vl128 state = 0xf793f7bb
+ __ dci(0x651ea3cc); // flogb z12.d, p0/m, z30.d
+ // vl128 state = 0xe5d71081
+ __ dci(0x651ea3dc); // flogb z28.d, p0/m, z30.d
+ // vl128 state = 0x33ffc09f
+ __ dci(0x651ea3d4); // flogb z20.d, p0/m, z30.d
+ // vl128 state = 0xd908a72e
+ __ dci(0x651ea3d5); // flogb z21.d, p0/m, z30.d
+ // vl128 state = 0x9528251a
+ __ dci(0x651ca394); // flogb z20.s, p0/m, z28.s
+ // vl128 state = 0xb1ac4188
+ __ dci(0x651ca396); // flogb z22.s, p0/m, z28.s
+ // vl128 state = 0xdc328726
+ __ dci(0x651ca1d7); // flogb z23.s, p0/m, z14.s
+ // vl128 state = 0xfc232eb7
+ __ dci(0x651ca947); // flogb z7.s, p2/m, z10.s
+ // vl128 state = 0xa9c53a1a
+ __ dci(0x651ca805); // flogb z5.s, p2/m, z0.s
+ // vl128 state = 0x9e4a47e9
+ __ dci(0x651ea841); // flogb z1.d, p2/m, z2.d
+ // vl128 state = 0x7a2aeaf6
+ __ dci(0x651ea843); // flogb z3.d, p2/m, z2.d
+ // vl128 state = 0xedd4aa97
+ __ dci(0x651caa4b); // flogb z11.s, p2/m, z18.s
+ // vl128 state = 0x7bfefefb
+ __ dci(0x651cab6f); // flogb z15.s, p2/m, z27.s
+ // vl128 state = 0x91b5a183
+ __ dci(0x651ca86b); // flogb z11.s, p2/m, z3.s
+ // vl128 state = 0x7b2776c2
+ __ dci(0x651ca47b); // flogb z27.s, p1/m, z3.s
+ // vl128 state = 0x46ea46c7
+ __ dci(0x651ca47f); // flogb z31.s, p1/m, z3.s
+ // vl128 state = 0x6e1d4e89
+ __ dci(0x651ca477); // flogb z23.s, p1/m, z3.s
+ // vl128 state = 0x5ea1220c
+ __ dci(0x651ca035); // flogb z21.s, p0/m, z1.s
+ // vl128 state = 0xb06e32be
+ __ dci(0x651ca2a5); // flogb z5.s, p0/m, z21.s
+ // vl128 state = 0xb856d206
+ __ dci(0x651caa2d); // flogb z13.s, p2/m, z17.s
+ // vl128 state = 0xebfd587f
+ __ dci(0x651caa3d); // flogb z29.s, p2/m, z17.s
+ // vl128 state = 0xb029ba8d
+ __ dci(0x651eaa7f); // flogb z31.d, p2/m, z19.d
+ // vl128 state = 0x07fd3f42
+ __ dci(0x651ebb7e); // flogb z30.d, p6/m, z27.d
+ // vl128 state = 0x79761d7a
+ __ dci(0x651ebb76); // flogb z22.d, p6/m, z27.d
+ // vl128 state = 0xdf56dd22
+ __ dci(0x651ebb72); // flogb z18.d, p6/m, z27.d
+ // vl128 state = 0xce798ad7
+ __ dci(0x651eb276); // flogb z22.d, p4/m, z19.d
+ // vl128 state = 0x84dd46d6
+ __ dci(0x651eb652); // flogb z18.d, p5/m, z18.d
+ // vl128 state = 0x2ea4a0df
+ __ dci(0x651cbe42); // flogb z2.s, p7/m, z18.s
+ // vl128 state = 0x8cdd1250
+ __ dci(0x651cb852); // flogb z18.s, p6/m, z2.s
+ // vl128 state = 0x5f5b051d
+ __ dci(0x651eb956); // flogb z22.d, p6/m, z10.d
+ // vl128 state = 0x7a17cdd1
+ __ dci(0x651eb11e); // flogb z30.d, p4/m, z8.d
+ // vl128 state = 0x7367f8ec
+ __ dci(0x651ab016); // flogb z22.h, p4/m, z0.h
+ // vl128 state = 0x8e1bfb06
+ __ dci(0x651ab014); // flogb z20.h, p4/m, z0.h
+ // vl128 state = 0x2bcfa0f0
+ __ dci(0x651aa81c); // flogb z28.h, p2/m, z0.h
+ // vl128 state = 0xeb9615e8
+ __ dci(0x651aa80c); // flogb z12.h, p2/m, z0.h
+ // vl128 state = 0x5b55f5cd
+ __ dci(0x651aa808); // flogb z8.h, p2/m, z0.h
+ // vl128 state = 0xdd1718f2
+ __ dci(0x651aa20a); // flogb z10.h, p0/m, z16.h
+ // vl128 state = 0x205e88ed
+ __ dci(0x651ab24e); // flogb z14.h, p4/m, z18.h
+ // vl128 state = 0x1c9f2035
+ __ dci(0x651ab36f); // flogb z15.h, p4/m, z27.h
+ // vl128 state = 0xea22efaf
+ __ dci(0x651ab36b); // flogb z11.h, p4/m, z27.h
+ // vl128 state = 0x0cd0b8cd
+ __ dci(0x651abb29); // flogb z9.h, p6/m, z25.h
+ // vl128 state = 0xa1a017d1
+ __ dci(0x651abb2d); // flogb z13.h, p6/m, z25.h
+ // vl128 state = 0x37d033d2
+ __ dci(0x651aba0c); // flogb z12.h, p6/m, z16.h
+ // vl128 state = 0x971bde83
+ __ dci(0x651cba1c); // flogb z28.s, p6/m, z16.s
+ // vl128 state = 0xb6b23bc2
+ __ dci(0x651cba1d); // flogb z29.s, p6/m, z16.s
+ // vl128 state = 0x1af298e0
+ __ dci(0x651cba15); // flogb z21.s, p6/m, z16.s
+ // vl128 state = 0x077a2869
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x077a2869,
+ 0xde5bc452,
+ 0xe80f0bc6,
+ 0x1c078cf2,
+ 0x66064034,
+ 0xa9f5264d,
+ 0xb19b24c1,
+ 0xb394864c,
+ 0x42991ea7,
+ 0xcf33094e,
+ 0xc4656d85,
+ 0x4cfa5b7e,
+ 0xbb7c121f,
+ 0xd2e8c839,
+ 0x028134cf,
+ 0x2f3e9779,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_fp_pair) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm, kFpInputSet);
+ // state = 0x1e5cbcac
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x64d591aa); // fminnmp z10.d, p4/m, z10.d, z13.d
+ // vl128 state = 0x02a0f18c
+ __ dci(0x64d59dab); // fminnmp z11.d, p7/m, z11.d, z13.d
+ // vl128 state = 0xd6d0a87f
+ __ dci(0x64d59d7b); // fminnmp z27.d, p7/m, z27.d, z11.d
+ // vl128 state = 0x364f93b4
+ __ dci(0x64d59c2b); // fminnmp z11.d, p7/m, z11.d, z1.d
+ // vl128 state = 0xc7ed7476
+ __ dci(0x64d59f23); // fminnmp z3.d, p7/m, z3.d, z25.d
+ // vl128 state = 0x7a1ec868
+ __ dci(0x64d59f22); // fminnmp z2.d, p7/m, z2.d, z25.d
+ // vl128 state = 0x862a3f3d
+ __ dci(0x64d49fa0); // fmaxnmp z0.d, p7/m, z0.d, z29.d
+ // vl128 state = 0x11f71743
+ __ dci(0x64d49fa8); // fmaxnmp z8.d, p7/m, z8.d, z29.d
+ // vl128 state = 0x302e45cd
+ __ dci(0x64d49fa9); // fmaxnmp z9.d, p7/m, z9.d, z29.d
+ // vl128 state = 0x11cca180
+ __ dci(0x64d68fb9); // fmaxp z25.d, p3/m, z25.d, z29.d
+ // vl128 state = 0xee6b2d42
+ __ dci(0x64d68fb8); // fmaxp z24.d, p3/m, z24.d, z29.d
+ // vl128 state = 0x060efb2c
+ __ dci(0x64d49fba); // fmaxnmp z26.d, p7/m, z26.d, z29.d
+ // vl128 state = 0x4f4232ac
+ __ dci(0x649497b2); // fmaxnmp z18.s, p5/m, z18.s, z29.s
+ // vl128 state = 0xe3e04479
+ __ dci(0x649096b6); // faddp z22.s, p5/m, z22.s, z21.s
+ // vl128 state = 0x2a407146
+ __ dci(0x64909237); // faddp z23.s, p4/m, z23.s, z17.s
+ // vl128 state = 0x6d0b2bb8
+ __ dci(0x64d09027); // faddp z7.d, p4/m, z7.d, z1.d
+ // vl128 state = 0x5e7d175f
+ __ dci(0x64509006); // faddp z6.h, p4/m, z6.h, z0.h
+ // vl128 state = 0xa0a4cd20
+ __ dci(0x64d0940e); // faddp z14.d, p5/m, z14.d, z0.d
+ // vl128 state = 0xf66b9cde
+ __ dci(0x64d09c4f); // faddp z15.d, p7/m, z15.d, z2.d
+ // vl128 state = 0x5a2d08c9
+ __ dci(0x64d09c5f); // faddp z31.d, p7/m, z31.d, z2.d
+ // vl128 state = 0x2e390409
+ __ dci(0x64d09c57); // faddp z23.d, p7/m, z23.d, z2.d
+ // vl128 state = 0xfb4af476
+ __ dci(0x64d09c56); // faddp z22.d, p7/m, z22.d, z2.d
+ // vl128 state = 0x8d8c621b
+ __ dci(0x64d08e5e); // faddp z30.d, p3/m, z30.d, z18.d
+ // vl128 state = 0xba8962e6
+ __ dci(0x64d0845c); // faddp z28.d, p1/m, z28.d, z2.d
+ // vl128 state = 0x224654c6
+ __ dci(0x64d0845d); // faddp z29.d, p1/m, z29.d, z2.d
+ // vl128 state = 0xef608134
+ __ dci(0x64d08e4d); // faddp z13.d, p3/m, z13.d, z18.d
+ // vl128 state = 0x5adedbf3
+ __ dci(0x64908645); // faddp z5.s, p1/m, z5.s, z18.s
+ // vl128 state = 0x04b4f366
+ __ dci(0x64908a4d); // faddp z13.s, p2/m, z13.s, z18.s
+ // vl128 state = 0xf0a7482a
+ __ dci(0x64d08245); // faddp z5.d, p0/m, z5.d, z18.d
+ // vl128 state = 0x0f2ccd61
+ __ dci(0x64909255); // faddp z21.s, p4/m, z21.s, z18.s
+ // vl128 state = 0x7665491f
+ __ dci(0x649096c5); // faddp z5.s, p5/m, z5.s, z22.s
+ // vl128 state = 0xc3b53fd3
+ __ dci(0x649492c1); // fmaxnmp z1.s, p4/m, z1.s, z22.s
+ // vl128 state = 0x589fd64a
+ __ dci(0x649096d1); // faddp z17.s, p5/m, z17.s, z22.s
+ // vl128 state = 0x5a0d0d52
+ __ dci(0x649096d5); // faddp z21.s, p5/m, z21.s, z22.s
+ // vl128 state = 0xba57cd51
+ __ dci(0x649096d4); // faddp z20.s, p5/m, z20.s, z22.s
+ // vl128 state = 0xa5d7b29d
+ __ dci(0x649093d0); // faddp z16.s, p4/m, z16.s, z30.s
+ // vl128 state = 0xa62cce9e
+ __ dci(0x64909318); // faddp z24.s, p4/m, z24.s, z24.s
+ // vl128 state = 0x8cc209c7
+ __ dci(0x64909008); // faddp z8.s, p4/m, z8.s, z0.s
+ // vl128 state = 0x56a9af04
+ __ dci(0x64969000); // fmaxp z0.s, p4/m, z0.s, z0.s
+ // vl128 state = 0xc45f824a
+ __ dci(0x64569004); // fmaxp z4.h, p4/m, z4.h, z0.h
+ // vl128 state = 0x82da5cb7
+ __ dci(0x64569000); // fmaxp z0.h, p4/m, z0.h, z0.h
+ // vl128 state = 0xa9fff0bf
+ __ dci(0x64569001); // fmaxp z1.h, p4/m, z1.h, z0.h
+ // vl128 state = 0x71c2e09a
+ __ dci(0x64569605); // fmaxp z5.h, p5/m, z5.h, z16.h
+ // vl128 state = 0xe50c8b49
+ __ dci(0x64579624); // fminp z4.h, p5/m, z4.h, z17.h
+ // vl128 state = 0x4f3817cb
+ __ dci(0x6457962c); // fminp z12.h, p5/m, z12.h, z17.h
+ // vl128 state = 0x5a773e57
+ __ dci(0x64d5963c); // fminnmp z28.d, p5/m, z28.d, z17.d
+ // vl128 state = 0xa5c5e37c
+ __ dci(0x64d7943e); // fminp z30.d, p5/m, z30.d, z1.d
+ // vl128 state = 0xc778f8a3
+ __ dci(0x6457953a); // fminp z26.h, p5/m, z26.h, z9.h
+ // vl128 state = 0x01abc4af
+ __ dci(0x6457952a); // fminp z10.h, p5/m, z10.h, z9.h
+ // vl128 state = 0x45483a17
+ __ dci(0x64579d7a); // fminp z26.h, p7/m, z26.h, z11.h
+ // vl128 state = 0x355b08b3
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x355b08b3,
+ 0x8f7890cd,
+ 0x5dddb069,
+ 0x030a5f52,
+ 0xc569c150,
+ 0x060423ba,
+ 0x5d729bd0,
+ 0x079b4f8b,
+ 0x06e75e58,
+ 0x6f631884,
+ 0xddc735f0,
+ 0x7213b8e2,
+ 0x8cbf507c,
+ 0x40654268,
+ 0x3cd7ad6c,
+ 0xfba0ee9e,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_fmlal_fmlsl_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x64a94f15); // fmlalt z21.s, z24.h, z1.h[3]
+ // vl128 state = 0x0895849b
+ __ dci(0x64ab4f9d); // fmlalt z29.s, z28.h, z3.h[3]
+ // vl128 state = 0x6e0cf3fe
+ __ dci(0x64a74f9c); // fmlalt z28.s, z28.h, z7.h[1]
+ // vl128 state = 0x482b4f57
+ __ dci(0x64a74dde); // fmlalt z30.s, z14.h, z7.h[1]
+ // vl128 state = 0xf047791e
+ __ dci(0x64a74cee); // fmlalt z14.s, z7.h, z7.h[1]
+ // vl128 state = 0xde33332c
+ __ dci(0x64a648ef); // fmlalb z15.s, z7.h, z6.h[1]
+ // vl128 state = 0xf7148941
+ __ dci(0x64a648ee); // fmlalb z14.s, z7.h, z6.h[1]
+ // vl128 state = 0x69f23fcb
+ __ dci(0x64b649ea); // fmlalb z10.s, z15.h, z6.h[5]
+ // vl128 state = 0x979eea1a
+ __ dci(0x64b649ee); // fmlalb z14.s, z15.h, z6.h[5]
+ // vl128 state = 0x522917a9
+ __ dci(0x64b649e6); // fmlalb z6.s, z15.h, z6.h[5]
+ // vl128 state = 0x7d773525
+ __ dci(0x64b64ba2); // fmlalb z2.s, z29.h, z6.h[5]
+ // vl128 state = 0x220960c6
+ __ dci(0x64b46baa); // fmlslb z10.s, z29.h, z4.h[5]
+ // vl128 state = 0x2c8e384a
+ __ dci(0x64b46dab); // fmlslt z11.s, z13.h, z4.h[5]
+ // vl128 state = 0xa592cde1
+ __ dci(0x64b467bb); // fmlslt z27.s, z29.h, z4.h[4]
+ // vl128 state = 0xba31bd61
+ __ dci(0x64b665b3); // fmlslt z19.s, z13.h, z6.h[4]
+ // vl128 state = 0x75dade04
+ __ dci(0x64b663bb); // fmlslb z27.s, z29.h, z6.h[4]
+ // vl128 state = 0xa7358466
+ __ dci(0x64a662bf); // fmlslb z31.s, z21.h, z6.h[0]
+ // vl128 state = 0x6125ca9d
+ __ dci(0x64a7623e); // fmlslb z30.s, z17.h, z7.h[0]
+ // vl128 state = 0x4b1cda83
+ __ dci(0x64a7462e); // fmlalt z14.s, z17.h, z7.h[0]
+ // vl128 state = 0x00d73a44
+ __ dci(0x64a6662f); // fmlslt z15.s, z17.h, z6.h[0]
+ // vl128 state = 0xc5ea9f30
+ __ dci(0x64a666ed); // fmlslt z13.s, z23.h, z6.h[0]
+ // vl128 state = 0xe17ba118
+ __ dci(0x64a26eec); // fmlslt z12.s, z23.h, z2.h[1]
+ // vl128 state = 0xd1962c7a
+ __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1]
+ // vl128 state = 0xde6f1ace
+ __ dci(0x64a26cb4); // fmlslt z20.s, z5.h, z2.h[1]
+ // vl128 state = 0x10d69920
+ __ dci(0x64a26cbc); // fmlslt z28.s, z5.h, z2.h[1]
+ // vl128 state = 0x8d190aec
+ __ dci(0x64a26cd8); // fmlslt z24.s, z6.h, z2.h[1]
+ // vl128 state = 0x432fdda3
+ __ dci(0x64a26c1a); // fmlslt z26.s, z0.h, z2.h[1]
+ // vl128 state = 0x9ababf0a
+ __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1]
+ // vl128 state = 0x609040ae
+ __ dci(0x64a24d1c); // fmlalt z28.s, z8.h, z2.h[1]
+ // vl128 state = 0x0a047710
+ __ dci(0x64a24d1e); // fmlalt z30.s, z8.h, z2.h[1]
+ // vl128 state = 0xf273945a
+ __ dci(0x64a0490e); // fmlalb z14.s, z8.h, z0.h[1]
+ // vl128 state = 0x3a5456f1
+ __ dci(0x64a0490c); // fmlalb z12.s, z8.h, z0.h[1]
+ // vl128 state = 0xdb948daf
+ __ dci(0x64b04b04); // fmlalb z4.s, z24.h, z0.h[5]
+ // vl128 state = 0xd2eae2af
+ __ dci(0x64b04b06); // fmlalb z6.s, z24.h, z0.h[5]
+ // vl128 state = 0x26627a2c
+ __ dci(0x64b04b07); // fmlalb z7.s, z24.h, z0.h[5]
+ // vl128 state = 0x2841173d
+ __ dci(0x64b84b26); // fmlalb z6.s, z25.h, z0.h[7]
+ // vl128 state = 0x9b52bcc6
+ __ dci(0x64ba4f27); // fmlalt z7.s, z25.h, z2.h[7]
+ // vl128 state = 0x813bbabe
+ __ dci(0x64ba4923); // fmlalb z3.s, z9.h, z2.h[7]
+ // vl128 state = 0xbb608dad
+ __ dci(0x64b84d22); // fmlalt z2.s, z9.h, z0.h[7]
+ // vl128 state = 0xf4d84ed6
+ __ dci(0x64b84d23); // fmlalt z3.s, z9.h, z0.h[7]
+ // vl128 state = 0x1cc0784e
+ __ dci(0x64bc4527); // fmlalt z7.s, z9.h, z4.h[6]
+ // vl128 state = 0x4eece4b7
+ __ dci(0x64bc6737); // fmlslt z23.s, z25.h, z4.h[6]
+ // vl128 state = 0x00dacf34
+ __ dci(0x64bc6fa7); // fmlslt z7.s, z29.h, z4.h[7]
+ // vl128 state = 0x597e23d4
+ __ dci(0x64bc6e25); // fmlslt z5.s, z17.h, z4.h[7]
+ // vl128 state = 0xa66b843c
+ __ dci(0x64be6f2d); // fmlslt z13.s, z25.h, z6.h[7]
+ // vl128 state = 0xb595ec08
+ __ dci(0x64be6765); // fmlslt z5.s, z27.h, z6.h[6]
+ // vl128 state = 0xd6c3af0a
+ __ dci(0x64be662d); // fmlslt z13.s, z17.h, z6.h[6]
+ // vl128 state = 0x864f26a8
+ __ dci(0x64bf6225); // fmlslb z5.s, z17.h, z7.h[6]
+ // vl128 state = 0xb969be4d
+ __ dci(0x64bb626d); // fmlslb z13.s, z19.h, z3.h[6]
+ // vl128 state = 0x73329b58
+ __ dci(0x64b9622c); // fmlslb z12.s, z17.h, z1.h[6]
+ // vl128 state = 0xfb7e2da2
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xfb7e2da2,
+ 0x34ad546c,
+ 0xd914c0d4,
+ 0xc173287c,
+ 0x07db96b2,
+ 0xab5ece8c,
+ 0xcda13318,
+ 0x6e62dc3f,
+ 0x0268d9b4,
+ 0x15118567,
+ 0xf55fb24f,
+ 0xc4ab4b56,
+ 0x5911f225,
+ 0x6d9c320c,
+ 0xc69bdedf,
+ 0x1635a43f,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_fp_convert) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x640ab3ee); // fcvtxnt z14.s, p4/m, z31.d
+ // vl128 state = 0x3ea71f7a
+ __ dci(0x64caa9e0); // fcvtnt z0.s, p2/m, z15.d
+ // vl128 state = 0xe9d750a1
+ __ dci(0x64cab83d); // fcvtnt z29.s, p6/m, z1.d
+ // vl128 state = 0x9ce43257
+ __ dci(0x650aad62); // fcvtx z2.s, p3/m, z11.d
+ // vl128 state = 0x60283e22
+ __ dci(0x64cbb42b); // fcvtlt z11.d, p5/m, z1.s
+ // vl128 state = 0xfbecbe4a
+ __ dci(0x6488ba54); // fcvtnt z20.h, p6/m, z18.s
+ // vl128 state = 0xbb81cc05
+ __ dci(0x64cbb730); // fcvtlt z16.d, p5/m, z25.s
+ // vl128 state = 0xd9cebdf5
+ __ dci(0x640aa5e4); // fcvtxnt z4.s, p1/m, z15.d
+ // vl128 state = 0x9dba64db
+ __ dci(0x650aa715); // fcvtx z21.s, p1/m, z24.d
+ // vl128 state = 0x0e68fab9
+ __ dci(0x64cabe86); // fcvtnt z6.s, p7/m, z20.d
+ // vl128 state = 0x5936ac32
+ __ dci(0x64cba075); // fcvtlt z21.d, p0/m, z3.s
+ // vl128 state = 0x2eb8a37b
+ __ dci(0x6488b3c5); // fcvtnt z5.h, p4/m, z30.s
+ // vl128 state = 0x9f471340
+ __ dci(0x6489b24a); // fcvtlt z10.s, p4/m, z18.h
+ // vl128 state = 0xcf5e5808
+ __ dci(0x64cbb514); // fcvtlt z20.d, p5/m, z8.s
+ // vl128 state = 0x870c5b85
+ __ dci(0x650ab090); // fcvtx z16.s, p4/m, z4.d
+ // vl128 state = 0x305da0a0
+ __ dci(0x64cbb2d3); // fcvtlt z19.d, p4/m, z22.s
+ // vl128 state = 0x8eb1b5fc
+ __ dci(0x64cbb093); // fcvtlt z19.d, p4/m, z4.s
+ // vl128 state = 0x3c070332
+ __ dci(0x6488b9b8); // fcvtnt z24.h, p6/m, z13.s
+ // vl128 state = 0xe0fc3455
+ __ dci(0x650aa64d); // fcvtx z13.s, p1/m, z18.d
+ // vl128 state = 0x65556c34
+ __ dci(0x6488b2d7); // fcvtnt z23.h, p4/m, z22.s
+ // vl128 state = 0xc9ccae47
+ __ dci(0x650ab36d); // fcvtx z13.s, p4/m, z27.d
+ // vl128 state = 0x31d942a1
+ __ dci(0x650aba2c); // fcvtx z12.s, p6/m, z17.d
+ // vl128 state = 0x27497e26
+ __ dci(0x650aa377); // fcvtx z23.s, p0/m, z27.d
+ // vl128 state = 0xbe0a7446
+ __ dci(0x6489a3a5); // fcvtlt z5.s, p0/m, z29.h
+ // vl128 state = 0x454c62cc
+ __ dci(0x64cabeb9); // fcvtnt z25.s, p7/m, z21.d
+ // vl128 state = 0x808a014f
+ __ dci(0x6489b4c2); // fcvtlt z2.s, p5/m, z6.h
+ // vl128 state = 0x55ae2250
+ __ dci(0x64cba246); // fcvtlt z6.d, p0/m, z18.s
+ // vl128 state = 0x7ce05c24
+ __ dci(0x650ab2a6); // fcvtx z6.s, p4/m, z21.d
+ // vl128 state = 0xa26121f5
+ __ dci(0x64cbb239); // fcvtlt z25.d, p4/m, z17.s
+ // vl128 state = 0xb40c58e1
+ __ dci(0x64cabdd9); // fcvtnt z25.s, p7/m, z14.d
+ // vl128 state = 0xf5077a54
+ __ dci(0x650ab75a); // fcvtx z26.s, p5/m, z26.d
+ // vl128 state = 0x95b006de
+ __ dci(0x650aa08b); // fcvtx z11.s, p0/m, z4.d
+ // vl128 state = 0x9ca5060c
+ __ dci(0x640aafd3); // fcvtxnt z19.s, p3/m, z30.d
+ // vl128 state = 0x85c89705
+ __ dci(0x64caaf3a); // fcvtnt z26.s, p3/m, z25.d
+ // vl128 state = 0x6b6aa4f9
+ __ dci(0x640abda1); // fcvtxnt z1.s, p7/m, z13.d
+ // vl128 state = 0x769cf76e
+ __ dci(0x6489a6f9); // fcvtlt z25.s, p1/m, z23.h
+ // vl128 state = 0x0a291b3b
+ __ dci(0x6489b38d); // fcvtlt z13.s, p4/m, z28.h
+ // vl128 state = 0x6b72e558
+ __ dci(0x650aaf63); // fcvtx z3.s, p3/m, z27.d
+ // vl128 state = 0xf4a004e0
+ __ dci(0x6488bfa4); // fcvtnt z4.h, p7/m, z29.s
+ // vl128 state = 0xe01c349e
+ __ dci(0x6489a6ee); // fcvtlt z14.s, p1/m, z23.h
+ // vl128 state = 0x3b06da53
+ __ dci(0x64cabbf8); // fcvtnt z24.s, p6/m, z31.d
+ // vl128 state = 0xc60fbbf0
+ __ dci(0x6489bc7f); // fcvtlt z31.s, p7/m, z3.h
+ // vl128 state = 0x8b281c78
+ __ dci(0x64caaf1f); // fcvtnt z31.s, p3/m, z24.d
+ // vl128 state = 0x0f17afbb
+ __ dci(0x650aac71); // fcvtx z17.s, p3/m, z3.d
+ // vl128 state = 0xce0ac3e1
+ __ dci(0x650aa1df); // fcvtx z31.s, p0/m, z14.d
+ // vl128 state = 0x71ba2085
+ __ dci(0x650aaf9f); // fcvtx z31.s, p3/m, z28.d
+ // vl128 state = 0xe42caea0
+ __ dci(0x640abff9); // fcvtxnt z25.s, p7/m, z31.d
+ // vl128 state = 0xec3c032c
+ __ dci(0x6489b8e5); // fcvtlt z5.s, p6/m, z7.h
+ // vl128 state = 0xe41850f7
+ __ dci(0x640aa1a1); // fcvtxnt z1.s, p0/m, z13.d
+ // vl128 state = 0xaf3944b4
+ __ dci(0x6488bf41); // fcvtnt z1.h, p7/m, z26.s
+ // vl128 state = 0xdffd02bd
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xdffd02bd,
+ 0x03d1f711,
+ 0x41cf3358,
+ 0xa351d0f6,
+ 0xffba25ff,
+ 0x14092947,
+ 0x26b194fe,
+ 0x42acd8a3,
+ 0xc0498960,
+ 0xcccf1171,
+ 0x8dca76ed,
+ 0xefbda194,
+ 0xcf04a23d,
+ 0x91e2629f,
+ 0xf05e8f52,
+ 0x4994ad4a,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_saturating_multiply_add_high_indexed) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
+ __ dci(0x442b1100); // sqrdmlah z0.h, z8.h, z3.h[1]
+ // vl128 state = 0xb012d377
+ __ dci(0x44211108); // sqrdmlah z8.h, z8.h, z1.h[0]
+ // vl128 state = 0xae399e50
+ __ dci(0x4421110c); // sqrdmlah z12.h, z8.h, z1.h[0]
+ // vl128 state = 0x1a46b700
+ __ dci(0x44291188); // sqrdmlah z8.h, z12.h, z1.h[1]
+ // vl128 state = 0x7525090a
+ __ dci(0x442811a9); // sqrdmlah z9.h, z13.h, z0.h[1]
+ // vl128 state = 0xf2907eb8
+ __ dci(0x442c11eb); // sqrdmlah z11.h, z15.h, z4.h[1]
+ // vl128 state = 0x65a71d51
+ __ dci(0x442c11e3); // sqrdmlah z3.h, z15.h, z4.h[1]
+ // vl128 state = 0x8b30e19b
+ __ dci(0x442413e1); // sqrdmlah z1.h, z31.h, z4.h[0]
+ // vl128 state = 0x448e4c0f
+ __ dci(0x44a413a0); // sqrdmlah z0.s, z29.s, z4.s[0]
+ // vl128 state = 0x1745e0db
+ __ dci(0x44241321); // sqrdmlah z1.h, z25.h, z4.h[0]
+ // vl128 state = 0xe07b491b
+ __ dci(0x44a413a5); // sqrdmlah z5.s, z29.s, z4.s[0]
+ // vl128 state = 0xad39c91c
+ __ dci(0x44e41327); // sqrdmlah z7.d, z25.d, z4.d[0]
+ // vl128 state = 0xd327dc1c
+ __ dci(0x44e4132f); // sqrdmlah z15.d, z25.d, z4.d[0]
+ // vl128 state = 0x8da341ca
+ __ dci(0x44e5130b); // sqrdmlah z11.d, z24.d, z5.d[0]
+ // vl128 state = 0x4dbd3ee1
+ __ dci(0x44e3130a); // sqrdmlah z10.d, z24.d, z3.d[0]
+ // vl128 state = 0x71452896
+ __ dci(0x44e3131a); // sqrdmlah z26.d, z24.d, z3.d[0]
+ // vl128 state = 0x4d6d8b90
+ __ dci(0x4463135e); // sqrdmlah z30.h, z26.h, z3.h[4]
+ // vl128 state = 0x0b53f7b4
+ __ dci(0x44e7135c); // sqrdmlah z28.d, z26.d, z7.d[0]
+ // vl128 state = 0x78ab2bb9
+ __ dci(0x44e7134c); // sqrdmlah z12.d, z26.d, z7.d[0]
+ // vl128 state = 0x3773b9e2
+ __ dci(0x44e51144); // sqrdmlah z4.d, z10.d, z5.d[0]
+ // vl128 state = 0x8f8883da
+ __ dci(0x44e411c0); // sqrdmlah z0.d, z14.d, z4.d[0]
+ // vl128 state = 0xa27ef92f
+ __ dci(0x44ec15c4); // sqrdmlsh z4.d, z14.d, z12.d[0]
+ // vl128 state = 0x6cea3cee
+ __ dci(0x44ec14e0); // sqrdmlsh z0.d, z7.d, z12.d[0]
+ // vl128 state = 0xb5e40d5f
+ __ dci(0x44ee16f0); // sqrdmlsh z16.d, z23.d, z14.d[0]
+ // vl128 state = 0xacf903eb
+ __ dci(0x44ea16d4); // sqrdmlsh z20.d, z22.d, z10.d[0]
+ // vl128 state = 0x698246a6
+ __ dci(0x44ea16d0); // sqrdmlsh z16.d, z22.d, z10.d[0]
+ // vl128 state = 0x58015eeb
+ __ dci(0x44ea16d1); // sqrdmlsh z17.d, z22.d, z10.d[0]
+ // vl128 state = 0xdbf1d9a6
+ __ dci(0x44ab16d3); // sqrdmlsh z19.s, z22.s, z3.s[1]
+ // vl128 state = 0xbde312bb
+ __ dci(0x44aa17d1); // sqrdmlsh z17.s, z30.s, z2.s[1]
+ // vl128 state = 0xc033b9a1
+ __ dci(0x44aa1650); // sqrdmlsh z16.s, z18.s, z2.s[1]
+ // vl128 state = 0x0e3b4c59
+ __ dci(0x44aa1632); // sqrdmlsh z18.s, z17.s, z2.s[1]
+ // vl128 state = 0x6f849e01
+ __ dci(0x44aa1710); // sqrdmlsh z16.s, z24.s, z2.s[1]
+ // vl128 state = 0x701e7316
+ __ dci(0x44aa1711); // sqrdmlsh z17.s, z24.s, z2.s[1]
+ // vl128 state = 0xbfbc7895
+ __ dci(0x44a91715); // sqrdmlsh z21.s, z24.s, z1.s[1]
+ // vl128 state = 0x2307c6f3
+ __ dci(0x44a91697); // sqrdmlsh z23.s, z20.s, z1.s[1]
+ // vl128 state = 0x78db6627
+ __ dci(0x44a91696); // sqrdmlsh z22.s, z20.s, z1.s[1]
+ // vl128 state = 0x37d25a35
+ __ dci(0x44a816de); // sqrdmlsh z30.s, z22.s, z0.s[1]
+ // vl128 state = 0xf611db46
+ __ dci(0x44ab16dc); // sqrdmlsh z28.s, z22.s, z3.s[1]
+ // vl128 state = 0x699a840f
+ __ dci(0x44af165d); // sqrdmlsh z29.s, z18.s, z7.s[1]
+ // vl128 state = 0x0b5d451f
+ __ dci(0x44af16f5); // sqrdmlsh z21.s, z23.s, z7.s[1]
+ // vl128 state = 0xe49e3b59
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xe49e3b59,
+ 0xce0062c7,
+ 0xf796ec27,
+ 0x1f952649,
+ 0x4e4354e6,
+ 0x90cb0c51,
+ 0xf0688aee,
+ 0xae9de352,
+ 0x652f0c0d,
+ 0x0000db74,
+ 0xdc23fff7,
+ 0x228c116c,
+ 0x8477dd7c,
+ 0x08377c46,
+ 0x6e05a40f,
+ 0x874126fb,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_sat_double_mul_high_index) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
+ __ dci(0x447bf609); // sqrdmulh z9.h, z16.h, z3.h[7]
+ // vl128 state = 0xacad7d7c
+ __ dci(0x447bf601); // sqrdmulh z1.h, z16.h, z3.h[7]
+ // vl128 state = 0xd6a976fe
+ __ dci(0x447bf600); // sqrdmulh z0.h, z16.h, z3.h[7]
+ // vl128 state = 0x959d4287
+ __ dci(0x446bf710); // sqrdmulh z16.h, z24.h, z3.h[5]
+ // vl128 state = 0x88b70b0e
+ __ dci(0x446af612); // sqrdmulh z18.h, z16.h, z2.h[5]
+ // vl128 state = 0xea48068a
+ __ dci(0x442af636); // sqrdmulh z22.h, z17.h, z2.h[1]
+ // vl128 state = 0x22135bae
+ __ dci(0x442af626); // sqrdmulh z6.h, z17.h, z2.h[1]
+ // vl128 state = 0x1ed137a8
+ __ dci(0x442af624); // sqrdmulh z4.h, z17.h, z2.h[1]
+ // vl128 state = 0x37aa44d4
+ __ dci(0x4420f625); // sqrdmulh z5.h, z17.h, z0.h[0]
+ // vl128 state = 0x9747863a
+ __ dci(0x4460f604); // sqrdmulh z4.h, z16.h, z0.h[4]
+ // vl128 state = 0xf6487f4b
+ __ dci(0x4460f605); // sqrdmulh z5.h, z16.h, z0.h[4]
+ // vl128 state = 0xb85302a6
+ __ dci(0x4420f641); // sqrdmulh z1.h, z18.h, z0.h[0]
+ // vl128 state = 0xfc85ce98
+ __ dci(0x4424f669); // sqrdmulh z9.h, z19.h, z4.h[0]
+ // vl128 state = 0xf0b36dd3
+ __ dci(0x4460f668); // sqrdmulh z8.h, z19.h, z0.h[4]
+ // vl128 state = 0x227fe9fe
+ __ dci(0x4462f6f8); // sqrdmulh z24.h, z23.h, z2.h[4]
+ // vl128 state = 0x7f4d89ab
+ __ dci(0x4462f6f0); // sqrdmulh z16.h, z23.h, z2.h[4]
+ // vl128 state = 0x61520386
+ __ dci(0x4472f6d1); // sqrdmulh z17.h, z22.h, z2.h[6]
+ // vl128 state = 0x34d07c81
+ __ dci(0x4472f250); // sqdmulh z16.h, z18.h, z2.h[6]
+ // vl128 state = 0x74313b89
+ __ dci(0x44b2f254); // sqdmulh z20.s, z18.s, z2.s[2]
+ // vl128 state = 0x7acc9692
+ __ dci(0x44e2f250); // sqdmulh z16.d, z18.d, z2.d[0]
+ // vl128 state = 0x3a1f908e
+ __ dci(0x44e4f251); // sqdmulh z17.d, z18.d, z4.d[0]
+ // vl128 state = 0xd2ae3642
+ __ dci(0x44e0f650); // sqrdmulh z16.d, z18.d, z0.d[0]
+ // vl128 state = 0x74da2dcc
+ __ dci(0x44f8f640); // sqrdmulh z0.d, z18.d, z8.d[1]
+ // vl128 state = 0x0273639a
+ __ dci(0x44f9f742); // sqrdmulh z2.d, z26.d, z9.d[1]
+ // vl128 state = 0x9c5062c9
+ __ dci(0x44f9f7e6); // sqrdmulh z6.d, z31.d, z9.d[1]
+ // vl128 state = 0x095e8fd7
+ __ dci(0x44fdf7ae); // sqrdmulh z14.d, z29.d, z13.d[1]
+ // vl128 state = 0x4ab7c261
+ __ dci(0x44fdf7af); // sqrdmulh z15.d, z29.d, z13.d[1]
+ // vl128 state = 0x7913f02e
+ __ dci(0x44f9f7ed); // sqrdmulh z13.d, z31.d, z9.d[1]
+ // vl128 state = 0xbbffd120
+ __ dci(0x44f9f7e5); // sqrdmulh z5.d, z31.d, z9.d[1]
+ // vl128 state = 0xc9cc793f
+ __ dci(0x44f5f7e4); // sqrdmulh z4.d, z31.d, z5.d[1]
+ // vl128 state = 0xc7cc2e4b
+ __ dci(0x44e5f3e0); // sqdmulh z0.d, z31.d, z5.d[0]
+ // vl128 state = 0x8a4efda7
+ __ dci(0x44e4f364); // sqdmulh z4.d, z27.d, z4.d[0]
+ // vl128 state = 0xfa30239a
+ __ dci(0x44edf366); // sqdmulh z6.d, z27.d, z13.d[0]
+ // vl128 state = 0x9c538671
+ __ dci(0x44adf322); // sqdmulh z2.s, z25.s, z5.s[1]
+ // vl128 state = 0xafb03157
+ __ dci(0x44adf263); // sqdmulh z3.s, z19.s, z5.s[1]
+ // vl128 state = 0x6ea1e1ff
+ __ dci(0x44bdf22b); // sqdmulh z11.s, z17.s, z5.s[3]
+ // vl128 state = 0x0040a3a0
+ __ dci(0x44adf62a); // sqrdmulh z10.s, z17.s, z5.s[1]
+ // vl128 state = 0x8b3e6419
+ __ dci(0x44adf622); // sqrdmulh z2.s, z17.s, z5.s[1]
+ // vl128 state = 0x579bf738
+ __ dci(0x44abf632); // sqrdmulh z18.s, z17.s, z3.s[1]
+ // vl128 state = 0x2678c680
+ __ dci(0x44a9f6ba); // sqrdmulh z26.s, z21.s, z1.s[1]
+ // vl128 state = 0xee25a322
+ __ dci(0x44a9f6aa); // sqrdmulh z10.s, z21.s, z1.s[1]
+ // vl128 state = 0x99cfcf9f
+ __ dci(0x44b1f6ab); // sqrdmulh z11.s, z21.s, z1.s[2]
+ // vl128 state = 0xa6785a38
+ __ dci(0x44b1f0bb); // sqdmulh z27.s, z5.s, z1.s[2]
+ // vl128 state = 0xfc822233
+ __ dci(0x4439f0bf); // sqdmulh z31.h, z5.h, z1.h[3]
+ // vl128 state = 0x322d49df
+ __ dci(0x4433f0be); // sqdmulh z30.h, z5.h, z3.h[2]
+ // vl128 state = 0xbf6733d2
+ __ dci(0x4433f0d6); // sqdmulh z22.h, z6.h, z3.h[2]
+ // vl128 state = 0x99f11483
+ __ dci(0x4437f2d7); // sqdmulh z23.h, z22.h, z7.h[2]
+ // vl128 state = 0x9c146ede
+ __ dci(0x4426f2d6); // sqdmulh z22.h, z22.h, z6.h[0]
+ // vl128 state = 0xc089284f
+ __ dci(0x44a6f0de); // sqdmulh z30.s, z6.s, z6.s[0]
+ // vl128 state = 0xe962a269
+ __ dci(0x44a4f04e); // sqdmulh z14.s, z2.s, z4.s[0]
+ // vl128 state = 0xaea2f35e
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xaea2f35e,
+ 0xb4e17c50,
+ 0x97dfb966,
+ 0x070d3c78,
+ 0x5b2f880d,
+ 0x8e643be0,
+ 0x4d7f006b,
+ 0xfbd08185,
+ 0x4960a97d,
+ 0x1e85903f,
+ 0x443b62e4,
+ 0xf196453a,
+ 0x50dae6ef,
+ 0x0e4bb245,
+ 0x69d661ab,
+ 0x7d6fb839,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(sve2_extract) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kSVE2,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 60 * kInstructionSize);
+ __ dci(0x056a1008); // ext z8.b, {z0.b, z1.b}, #84
+ // vl128 state = 0x06ae6d5d
+ __ dci(0x05601418); // ext z24.b, {z0.b, z1.b}, #5
+ // vl128 state = 0x3b73c922
+ __ dci(0x05601708); // ext z8.b, {z24.b, z25.b}, #5
+ // vl128 state = 0xc3526a3d
+ __ dci(0x05601d0c); // ext z12.b, {z8.b, z9.b}, #7
+ // vl128 state = 0xbde17731
+ __ dci(0x05600c1c); // ext z28.b, {z0.b, z1.b}, #3
+ // vl128 state = 0x9ac72141
+ __ dci(0x05600c58); // ext z24.b, {z2.b, z3.b}, #3
+ // vl128 state = 0xccecefc0
+ __ dci(0x05600410); // ext z16.b, {z0.b, z1.b}, #1
+ // vl128 state = 0xe49d5f89
+ __ dci(0x05600438); // ext z24.b, {z1.b, z2.b}, #1
+ // vl128 state = 0x9967df9d
+ __ dci(0x0560067a); // ext z26.b, {z19.b, z20.b}, #1
+ // vl128 state = 0x110a8b46
+ __ dci(0x05601478); // ext z24.b, {z3.b, z4.b}, #5
+ // vl128 state = 0x558f95f2
+ __ dci(0x0560117c); // ext z28.b, {z11.b, z12.b}, #4
+ // vl128 state = 0x18d0f048
+ __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5
+ // vl128 state = 0x1719547f
+ __ dci(0x05601c7a); // ext z26.b, {z3.b, z4.b}, #7
+ // vl128 state = 0x600cfa8a
+ __ dci(0x0560187e); // ext z30.b, {z3.b, z4.b}, #6
+ // vl128 state = 0xc93e431e
+ __ dci(0x05601876); // ext z22.b, {z3.b, z4.b}, #6
+ // vl128 state = 0x5be7af00
+ __ dci(0x05601c26); // ext z6.b, {z1.b, z2.b}, #7
+ // vl128 state = 0xd3d69d02
+ __ dci(0x05601c2e); // ext z14.b, {z1.b, z2.b}, #7
+ // vl128 state = 0x1d88c27b
+ __ dci(0x05601d3e); // ext z30.b, {z9.b, z10.b}, #7
+ // vl128 state = 0x56f91523
+ __ dci(0x05601dae); // ext z14.b, {z13.b, z14.b}, #7
+ // vl128 state = 0xbc175582
+ __ dci(0x056015ef); // ext z15.b, {z15.b, z16.b}, #5
+ // vl128 state = 0x9289a9ba
+ __ dci(0x0560157f); // ext z31.b, {z11.b, z12.b}, #5
+ // vl128 state = 0x46be3725
+ __ dci(0x0560157e); // ext z30.b, {z11.b, z12.b}, #5
+ // vl128 state = 0xa4fd59e9
+ __ dci(0x0560156e); // ext z14.b, {z11.b, z12.b}, #5
+ // vl128 state = 0x88b9ba85
+ __ dci(0x05601566); // ext z6.b, {z11.b, z12.b}, #5
+ // vl128 state = 0x7f3b2a36
+ __ dci(0x056017e4); // ext z4.b, {z31.b, z0.b}, #5
+ // vl128 state = 0xa71b8fa9
+ __ dci(0x05601f74); // ext z20.b, {z27.b, z28.b}, #7
+ // vl128 state = 0x89dcdeac
+ __ dci(0x05601f44); // ext z4.b, {z26.b, z27.b}, #7
+ // vl128 state = 0xa877313f
+ __ dci(0x05601e45); // ext z5.b, {z18.b, z19.b}, #7
+ // vl128 state = 0x6181834a
+ __ dci(0x05601255); // ext z21.b, {z18.b, z19.b}, #4
+ // vl128 state = 0x7c3595cd
+ __ dci(0x05701a51); // ext z17.b, {z18.b, z19.b}, #134
+ // vl128 state = 0x10fdfe4d
+ __ dci(0x05701ad3); // ext z19.b, {z22.b, z23.b}, #134
+ // vl128 state = 0x08e923c5
+ __ dci(0x05701ad1); // ext z17.b, {z22.b, z23.b}, #134
+ // vl128 state = 0xefb2c9e9
+ __ dci(0x05701b41); // ext z1.b, {z26.b, z27.b}, #134
+ // vl128 state = 0xd5dccda9
+ __ dci(0x05701b40); // ext z0.b, {z26.b, z27.b}, #134
+ // vl128 state = 0xd424c039
+ __ dci(0x05701bd0); // ext z16.b, {z30.b, z31.b}, #134
+ // vl128 state = 0xd914c077
+ __ dci(0x057013d8); // ext z24.b, {z30.b, z31.b}, #132
+ // vl128 state = 0x32459b3a
+ __ dci(0x05701259); // ext z25.b, {z18.b, z19.b}, #132
+ // vl128 state = 0x422ed7bf
+ __ dci(0x0570125d); // ext z29.b, {z18.b, z19.b}, #132
+ // vl128 state = 0x6bfc46ef
+ __ dci(0x05700215); // ext z21.b, {z16.b, z17.b}, #128
+ // vl128 state = 0xc53b85ed
+ __ dci(0x0560021d); // ext z29.b, {z16.b, z17.b}, #0
+ // vl128 state = 0xd391e5ec
+ __ dci(0x0570121c); // ext z28.b, {z16.b, z17.b}, #132
+ // vl128 state = 0x7990c1d7
+ __ dci(0x0570030c); // ext z12.b, {z24.b, z25.b}, #128
+ // vl128 state = 0xca0d3db8
+ __ dci(0x05700b88); // ext z8.b, {z28.b, z29.b}, #130
+ // vl128 state = 0xe5c71442
+ __ dci(0x05600b0c); // ext z12.b, {z24.b, z25.b}, #2
+ // vl128 state = 0x68510d62
+ __ dci(0x05600f1c); // ext z28.b, {z24.b, z25.b}, #3
+ // vl128 state = 0x77f9f046
+ __ dci(0x05600e14); // ext z20.b, {z16.b, z17.b}, #3
+ // vl128 state = 0x7068dedf
+ __ dci(0x05600604); // ext z4.b, {z16.b, z17.b}, #1
+ // vl128 state = 0x8b70c406
+ __ dci(0x05600406); // ext z6.b, {z0.b, z1.b}, #1
+ // vl128 state = 0x10e6b48c
+ __ dci(0x05600056); // ext z22.b, {z2.b, z3.b}, #0
+ // vl128 state = 0xe1294d7a
+ __ dci(0x05600052); // ext z18.b, {z2.b, z3.b}, #0
+ // vl128 state = 0x0762bbb0
+ __ dci(0x056000d6); // ext z22.b, {z6.b, z7.b}, #0
+ // vl128 state = 0x58be0ba4
+ __ dci(0x057008de); // ext z30.b, {z6.b, z7.b}, #130
+ // vl128 state = 0x8a2018e9
+ __ dci(0x0570085a); // ext z26.b, {z2.b, z3.b}, #130
+ // vl128 state = 0xb019b7e0
+ __ dci(0x057009d2); // ext z18.b, {z14.b, z15.b}, #130
+ // vl128 state = 0x9e6e14ed
+ __ dci(0x057008fa); // ext z26.b, {z7.b, z8.b}, #130
+ // vl128 state = 0x4cf64d22
+ __ dci(0x057008f2); // ext z18.b, {z7.b, z8.b}, #130
+ // vl128 state = 0x048c30f9
+ __ dci(0x057002f3); // ext z19.b, {z23.b, z24.b}, #128
+ // vl128 state = 0x2d7eb43b
+ __ dci(0x057006a3); // ext z3.b, {z21.b, z22.b}, #129
+ // vl128 state = 0xa37aeb5e
+ __ dci(0x05700687); // ext z7.b, {z20.b, z21.b}, #129
+ // vl128 state = 0xd8d7cdc7
+ __ dci(0x056006b7); // ext z23.b, {z21.b, z22.b}, #1
+ // vl128 state = 0x2480e1d4
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x2480e1d4,
+ 0x4dc42cc5,
+ 0x7ac24121,
+ 0x9eaf5c98,
+ 0x1b7b35dc,
+ 0x1b1035fc,
+ 0xe15f6899,
+ 0xaad14717,
+ 0x3327c3fc,
+ 0x7f349408,
+ 0x2d865b00,
+ 0x9819cd29,
+ 0x7f64cace,
+ 0x3751e2c1,
+ 0x7e60fc24,
+ 0xc6b308fc,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+} // namespace aarch64
+} // namespace vixl
diff --git a/test/aarch64/test-trace-aarch64.cc b/test/aarch64/test-trace-aarch64.cc
index a0a2172f..27e71d57 100644
--- a/test/aarch64/test-trace-aarch64.cc
+++ b/test/aarch64/test-trace-aarch64.cc
@@ -2881,6 +2881,55 @@ static void GenerateTestSequenceSVE(MacroAssembler* masm) {
SVEMemOperand(x0, 4, SVE_MUL_VL));
}
+static void GenerateTestSequenceAtomics(MacroAssembler* masm) {
+ ExactAssemblyScope guard(masm,
+ masm->GetBuffer()->GetRemainingBytes(),
+ ExactAssemblyScope::kMaximumSize);
+ CPUFeaturesScope feature_guard(masm, CPUFeatures::kAtomics);
+ __ sub(sp, sp, 16); // Claim some working space on the stack.
+ __ mov(x0, 0x5555555555555555);
+ __ str(x0, MemOperand(sp)); // Initialise working space.
+
+#define INST_LIST(OP) \
+ __ ld##OP##b(w0, w0, MemOperand(sp)); \
+ __ ld##OP##ab(w0, w1, MemOperand(sp)); \
+ __ ld##OP##lb(w0, w2, MemOperand(sp)); \
+ __ ld##OP##alb(w0, w3, MemOperand(sp)); \
+ __ ld##OP##h(w0, w0, MemOperand(sp)); \
+ __ ld##OP##ah(w0, w1, MemOperand(sp)); \
+ __ ld##OP##lh(w0, w2, MemOperand(sp)); \
+ __ ld##OP##alh(w0, w3, MemOperand(sp)); \
+ __ ld##OP(w0, w0, MemOperand(sp)); \
+ __ ld##OP##a(w0, w1, MemOperand(sp)); \
+ __ ld##OP##l(w0, w2, MemOperand(sp)); \
+ __ ld##OP##al(w0, w3, MemOperand(sp)); \
+ __ ld##OP(x0, x0, MemOperand(sp)); \
+ __ ld##OP##a(x0, x1, MemOperand(sp)); \
+ __ ld##OP##l(x0, x2, MemOperand(sp)); \
+ __ ld##OP##al(x0, x3, MemOperand(sp)); \
+ __ st##OP##b(w0, MemOperand(sp)); \
+ __ st##OP##lb(w0, MemOperand(sp)); \
+ __ st##OP##h(w0, MemOperand(sp)); \
+ __ st##OP##lh(w0, MemOperand(sp)); \
+ __ st##OP(w0, MemOperand(sp)); \
+ __ st##OP##l(w0, MemOperand(sp)); \
+ __ st##OP(x0, MemOperand(sp)); \
+ __ st##OP##l(x0, MemOperand(sp));
+
+ INST_LIST(add);
+ INST_LIST(set);
+ INST_LIST(eor);
+ INST_LIST(smin);
+ INST_LIST(smax);
+ INST_LIST(umin);
+ INST_LIST(umax);
+ INST_LIST(clr);
+
+#undef INST_LIST
+
+ __ add(sp, sp, 16); // Restore stack pointer.
+}
+
static void MaskAddresses(const char* trace) {
#define VIXL_COLOUR "(\x1b\\[[01];([0-9][0-9])?m)?"
// All patterns are replaced with "$1~~~~~~~~~~~~~~~~".
@@ -3036,6 +3085,7 @@ static void TraceTestHelper(bool coloured_trace,
GenerateTestSequenceNEON(&masm);
GenerateTestSequenceNEONFP(&masm);
GenerateTestSequenceSVE(&masm);
+ GenerateTestSequenceAtomics(&masm);
masm.Ret();
masm.FinalizeCode();
@@ -3127,6 +3177,7 @@ static void PrintDisassemblerTestHelper(const char* prefix,
GenerateTestSequenceNEON(&masm);
GenerateTestSequenceNEONFP(&masm);
GenerateTestSequenceSVE(&masm);
+ GenerateTestSequenceAtomics(&masm);
masm.FinalizeCode();
Decoder decoder;
diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc
index 5e6a3519..76e7eae0 100644
--- a/test/aarch64/test-utils-aarch64.cc
+++ b/test/aarch64/test-utils-aarch64.cc
@@ -30,6 +30,7 @@
#include "test-runner.h"
#include "test-utils-aarch64.h"
+#include "../test/aarch64/test-simulator-inputs-aarch64.h"
#include "aarch64/cpu-aarch64.h"
#include "aarch64/disasm-aarch64.h"
#include "aarch64/macro-assembler-aarch64.h"
@@ -778,5 +779,240 @@ bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
#endif
}
+// Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
+// h-, s- and d-lane sizes respectively, and p4, p5 are clobberred as a temp
+// predicate.
+template <typename T, size_t N>
+void SetFpData(MacroAssembler* masm,
+ int esize,
+ const T (&values)[N],
+ uint64_t lcg_mult) {
+ uint64_t a = 0;
+ uint64_t b = lcg_mult;
+ // Be used to populate the assigned element slots of register based on the
+ // type of floating point.
+ __ Pfalse(p5.VnB());
+ switch (esize) {
+ case kHRegSize:
+ a = Float16ToRawbits(Float16(1.5));
+ // Pick a convenient number within largest normal half-precision floating
+ // point.
+ b = Float16ToRawbits(Float16(lcg_mult % 1024));
+ // Step 1: Set fp16 numbers to the undefined registers.
+ // p4< 15:0>: 0b0101010101010101
+ // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
+ __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
+ break;
+ case kSRegSize:
+ a = FloatToRawbits(1.5);
+ b = FloatToRawbits(lcg_mult);
+ // Step 2: Set fp32 numbers to register on top of fp16 initialized.
+ // p4< 15:0>: 0b0000000100000001
+ // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
+ __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
+ break;
+ case kDRegSize:
+ a = DoubleToRawbits(1.5);
+ b = DoubleToRawbits(lcg_mult);
+ // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
+ // initialized.
+ // p4< 15:0>: 0b0000000000000001
+ // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
+ __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+
+ __ Dup(z30.WithLaneSize(esize), a);
+ __ Dup(z31.WithLaneSize(esize), b);
+
+ for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
+ // As floating point operations on random values have a tendency to
+ // converge on special-case numbers like NaNs, adopt normal floating point
+ // values be the seed instead.
+ InsrHelper(masm, z0.WithLaneSize(esize), values);
+ }
+
+ __ Fmla(z0.WithLaneSize(esize),
+ p4.Merging(),
+ z30.WithLaneSize(esize),
+ z0.WithLaneSize(esize),
+ z31.WithLaneSize(esize),
+ FastNaNPropagation);
+
+ for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
+ __ Fmla(ZRegister(i).WithLaneSize(esize),
+ p4.Merging(),
+ z30.WithLaneSize(esize),
+ ZRegister(i - 1).WithLaneSize(esize),
+ z31.WithLaneSize(esize),
+ FastNaNPropagation);
+ }
+
+ __ Fmul(z31.WithLaneSize(esize),
+ p4.Merging(),
+ z31.WithLaneSize(esize),
+ z30.WithLaneSize(esize),
+ FastNaNPropagation);
+ __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
+}
+
+// Set z0 - z31 to some normal floating point data.
+void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
+ // Initialise each Z registers to a mixture of fp16/32/64 values as following
+ // pattern:
+ // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
+ // register.
+ //
+ // For example:
+ // z{code}<2047:1920>: 0x{< fp64 >< fp32 ><fp16><fp16>}
+ // ...
+ // z{code}< 127: 0>: 0x{< fp64 >< fp32 ><fp16><fp16>}
+ //
+ // In current manner, in order to make a desired mixture, each part of
+ // initialization have to be called in the following order.
+ SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
+ SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
+ SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
+}
+
+void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
+ USE(input_set);
+ uint64_t lcg_mult = 6364136223846793005;
+
+ // Set x0 - x30 to pseudo-random data.
+ __ Mov(x29, 1); // LCG increment.
+ __ Mov(x30, lcg_mult);
+ __ Mov(x0, 42); // LCG seed.
+
+ __ Cmn(x0, 0); // Clear NZCV flags for later.
+
+ __ Madd(x0, x0, x30, x29); // First pseudo-random number.
+
+ // Registers 1 - 29.
+ for (unsigned i = 1; i < 30; i++) {
+ __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
+ }
+ __ Mul(x30, x29, x30);
+ __ Add(x30, x30, 1);
+
+
+ // Set first four predicate registers to true for increasing lane sizes.
+ __ Ptrue(p0.VnB());
+ __ Ptrue(p1.VnH());
+ __ Ptrue(p2.VnS());
+ __ Ptrue(p3.VnD());
+
+ // Set z0 - z31 to pseudo-random data.
+ if (input_set == kIntInputSet) {
+ __ Dup(z30.VnD(), 1);
+ __ Dup(z31.VnD(), lcg_mult);
+ __ Index(z0.VnB(), -16, 13); // LCG seeds.
+
+ __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
+ for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
+ __ Mla(ZRegister(i).VnD(),
+ p0.Merging(),
+ z30.VnD(),
+ ZRegister(i - 1).VnD(),
+ z31.VnD());
+ }
+ __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
+ __ Add(z31.VnD(), z31.VnD(), 1);
+
+ } else {
+ VIXL_ASSERT(input_set == kFpInputSet);
+ InitialiseRegisterFp(masm, lcg_mult);
+ }
+
+ // Set remaining predicate registers based on earlier pseudo-random data.
+ for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
+ __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
+ }
+ for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
+ __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
+ __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
+ __ Mov(PRegister(i), p0);
+ }
+ __ Ptrue(p0.VnB());
+
+ // At this point, only sp and a few status registers are undefined. These
+ // must be ignored when computing the state hash.
+}
+
+void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
+ // Use explicit registers, to avoid hash order varying if
+ // UseScratchRegisterScope changes.
+ UseScratchRegisterScope temps(masm);
+ temps.ExcludeAll();
+ Register t0 = w0;
+ Register t1 = x1;
+
+ // Compute hash of x0 - x30.
+ __ Push(t0.X(), t1);
+ __ Crc32x(t0, wzr, t0.X());
+ for (unsigned i = 0; i < kNumberOfRegisters; i++) {
+ if (i == xzr.GetCode()) continue; // Skip sp.
+ if (t0.Is(WRegister(i))) continue; // Skip t0, as it's already hashed.
+ __ Crc32x(t0, t0, XRegister(i));
+ }
+
+ // Hash the status flags.
+ __ Mrs(t1, NZCV);
+ __ Crc32x(t0, t0, t1);
+
+ // Acquire another temp, as integer registers have been hashed already.
+ __ Push(x30, xzr);
+ Register t2 = x30;
+
+ // Compute hash of all bits in z0 - z31. This implies different hashes are
+ // produced for machines of different vector length.
+ for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
+ __ Rdvl(t2, 1);
+ __ Lsr(t2, t2, 4);
+ Label vl_loop;
+ __ Bind(&vl_loop);
+ __ Umov(t1, VRegister(i).V2D(), 0);
+ __ Crc32x(t0, t0, t1);
+ __ Umov(t1, VRegister(i).V2D(), 1);
+ __ Crc32x(t0, t0, t1);
+ __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
+ __ Sub(t2, t2, 1);
+ __ Cbnz(t2, &vl_loop);
+ }
+
+ // Hash predicate registers. For simplicity, this writes the predicate
+ // registers to a zero-initialised area of stack of the maximum size required
+ // for P registers. It then computes a hash of that entire stack area.
+ unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
+
+ // Zero claimed stack area.
+ for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
+ __ Push(xzr, xzr);
+ }
+
+ // Store all P registers to the stack.
+ __ Mov(t1, sp);
+ for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
+ __ Str(PRegister(i), SVEMemOperand(t1));
+ __ Add(t1, t1, kPRegMaxSizeInBytes);
+ }
+
+ // Hash the entire stack area.
+ for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
+ __ Pop(t1, t2);
+ __ Crc32x(t0, t0, t1);
+ __ Crc32x(t0, t0, t2);
+ }
+
+ __ Mov(t1, reinterpret_cast<uint64_t>(dst));
+ __ Str(t0, MemOperand(t1));
+
+ __ Pop(xzr, x30);
+ __ Pop(t1, t0.X());
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h
index 554cd83f..b1c28986 100644
--- a/test/aarch64/test-utils-aarch64.h
+++ b/test/aarch64/test-utils-aarch64.h
@@ -583,6 +583,86 @@ bool CanRun(const CPUFeatures& required, bool* queried_can_run = NULL);
// we need to enable it in the infrastructure code for each test.
static const CPUFeatures kInfrastructureCPUFeatures(CPUFeatures::kNEON);
+enum InputSet {
+ kIntInputSet = 0,
+ kFpInputSet,
+};
+
+// Initialise CPU registers to a predictable, non-zero set of values. This
+// sets core, vector, predicate and flag registers, though leaves the stack
+// pointer at its original value.
+void SetInitialMachineState(MacroAssembler* masm,
+ InputSet input_set = kIntInputSet);
+
+// Compute a CRC32 hash of the machine state, and store it to dst. The hash
+// covers core (not sp), vector (lower 128 bits), predicate (lower 16 bits)
+// and flag registers.
+void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst);
+
+// The TEST_SVE macro works just like the usual TEST macro, but the resulting
+// function receives a `const Test& config` argument, to allow it to query the
+// vector length.
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#define TEST_SVE_INNER(type, name) \
+ void Test##name(Test* config); \
+ Test* test_##name##_list[] = \
+ {Test::MakeSVETest(128, \
+ "AARCH64_" type "_" #name "_vl128", \
+ &Test##name), \
+ Test::MakeSVETest(384, \
+ "AARCH64_" type "_" #name "_vl384", \
+ &Test##name), \
+ Test::MakeSVETest(2048, \
+ "AARCH64_" type "_" #name "_vl2048", \
+ &Test##name)}; \
+ void Test##name(Test* config)
+
+#define SVE_SETUP_WITH_FEATURES(...) \
+ SETUP_WITH_FEATURES(__VA_ARGS__); \
+ simulator.SetVectorLengthInBits(config->sve_vl_in_bits())
+
+#else
+// Otherwise, just use whatever the hardware provides.
+static const int kSVEVectorLengthInBits =
+ CPUFeatures::InferFromOS().Has(CPUFeatures::kSVE)
+ ? CPU::ReadSVEVectorLengthInBits()
+ : kZRegMinSize;
+
+#define TEST_SVE_INNER(type, name) \
+ void Test##name(Test* config); \
+ Test* test_##name##_vlauto = \
+ Test::MakeSVETest(kSVEVectorLengthInBits, \
+ "AARCH64_" type "_" #name "_vlauto", \
+ &Test##name); \
+ void Test##name(Test* config)
+
+#define SVE_SETUP_WITH_FEATURES(...) \
+ SETUP_WITH_FEATURES(__VA_ARGS__); \
+ USE(config)
+
+#endif
+
+// Call masm->Insr repeatedly to allow test inputs to be set up concisely. This
+// is optimised for call-site clarity, not generated code quality, so it doesn't
+// exist in the MacroAssembler itself.
+//
+// Usage:
+//
+// int values[] = { 42, 43, 44 };
+// InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 }
+//
+// The rightmost (highest-indexed) array element maps to the lowest-numbered
+// lane.
+template <typename T, size_t N>
+void InsrHelper(MacroAssembler* masm,
+ const ZRegister& zdn,
+ const T (&values)[N]) {
+ for (size_t i = 0; i < N; i++) {
+ masm->Insr(zdn, values[i]);
+ }
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/test-donkey.cc b/test/test-donkey.cc
new file mode 100644
index 00000000..250fa5d3
--- /dev/null
+++ b/test/test-donkey.cc
@@ -0,0 +1,327 @@
+// Copyright 2020, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <regex>
+#include <set>
+
+#include "aarch64/test-utils-aarch64.h"
+
+using namespace vixl;
+using namespace vixl::aarch64;
+
+#define __ masm->
+
+class InstructionReporter : public DecoderVisitor {
+ public:
+ InstructionReporter() : DecoderVisitor(kNonConstVisitor) {}
+
+ void Visit(Metadata *metadata, const Instruction *instr) VIXL_OVERRIDE {
+ USE(instr);
+ instr_form_ = (*metadata)["form"];
+ }
+
+ std::string MoveForm() { return std::move(instr_form_); }
+
+ private:
+ std::string instr_form_;
+};
+
+Instr Mutate(Instr base) {
+ Instr result = base;
+ while ((result == base) || (result == 0)) {
+ // Flip two bits somewhere in the most-significant 27.
+ for (int i = 0; i < 2; i++) {
+ uint32_t pos = 5 + ((lrand48() >> 20) % 27);
+ result = result ^ (1 << pos);
+ }
+
+ // Always flip one of the low five bits, as that's where the destination
+ // register is often encoded.
+ uint32_t dst_pos = (lrand48() >> 20) % 5;
+ result = result ^ (1 << dst_pos);
+ }
+ return result;
+}
+
+#ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
+int main(void) {
+ printf("Test donkey requires a simulator build to be useful.\n");
+ return 0;
+}
+#else
+int main(int argc, char **argv) {
+ if ((argc < 3) || (argc > 5)) {
+ printf(
+ "Usage: test-donkey <instruction form regex> <number of instructions "
+ "to emit in test> <encoding generation manner> <input data type>\n"
+ " regex - ECMAScript (C++11) regular expression to match instruction "
+ "form\n"
+ " encoding=random - use rng only to select new instructions\n"
+ " (can take longer, but gives better coverage for disparate "
+ "encodings)\n"
+ " encoding=`initial hex` - hex encoding of first instruction in test, "
+ "eg. 1234abcd\n"
+ " input data type - used to specify the data type of generating "
+ "input, e.g. input=fp, default set to integer type\n"
+ " command examples :\n"
+ " ./test-donkey \"fml[as]l[bt]\" 50 encoding=random input=fp\n"
+ " ./test-donkey \"fml[as]l[bt]\" 30 input=int\n");
+ exit(1);
+ }
+
+ // Use LC-RNG only to select instructions.
+ bool random_only = false;
+
+ std::string target_re = argv[1];
+ uint32_t count = static_cast<uint32_t>(strtoul(argv[2], NULL, 10));
+ uint32_t cmdline_encoding = 0;
+ InputSet input_set = kIntInputSet;
+ if (argc > 3) {
+ // The arguments of instruction pattern and the number of generating
+ // instructions are processed.
+ int32_t i = 3;
+ std::string argv_s(argv[i]);
+ if (argv_s.find("encoding=") != std::string::npos) {
+ char *c = argv[i];
+ c += 9;
+ if (strcmp(c, "random") == 0) {
+ random_only = true;
+ } else {
+ cmdline_encoding = static_cast<uint32_t>(strtoul(c, NULL, 16));
+ }
+ i++;
+ }
+
+ if ((argc > 4) || (i == 3)) {
+ argv_s = std::string(argv[i]);
+ if (argv_s.find("input=") != std::string::npos) {
+ char *c = argv[i];
+ c += 6;
+ if (strcmp(c, "fp") == 0) {
+ input_set = kFpInputSet;
+ } else {
+ VIXL_ASSERT(strcmp(c, "int") == 0);
+ }
+ i++;
+ }
+ }
+
+ // Ensure all arguments have been processed.
+ VIXL_ASSERT(argc == i);
+ }
+
+ srand48(42);
+
+ MacroAssembler masm;
+ masm.GetCPUFeatures()->Combine(CPUFeatures::kSVE);
+
+ std::map<int, Simulator *> sim_vl;
+ for (int i = 128; i <= 2048; i += 128) {
+ sim_vl[i] = new Simulator(new Decoder());
+ sim_vl[i]->SetVectorLengthInBits(i);
+ }
+
+ char buffer[256];
+ Decoder trial_decoder;
+ Disassembler disasm(buffer, sizeof(buffer));
+ InstructionReporter reporter;
+ trial_decoder.AppendVisitor(&reporter);
+ trial_decoder.AppendVisitor(&disasm);
+
+ using InstrData = struct {
+ Instr inst;
+ std::string disasm;
+ uint32_t state_hash;
+ };
+ std::vector<InstrData> useful_insts;
+
+ // Seen states are only considered for vl128. It's assumed that a new state
+ // for vl128 implies a new state for all other vls.
+ std::set<uint32_t> seen_states;
+ uint32_t state_hash;
+
+ std::map<int, uint32_t> initial_state_vl;
+ std::map<int, uint32_t> state_hash_vl;
+
+ // Compute hash of the initial state of the machine.
+ Label test;
+ masm.Bind(&test);
+ masm.PushCalleeSavedRegisters();
+ SetInitialMachineState(&masm, input_set);
+ ComputeMachineStateHash(&masm, &state_hash);
+ masm.PopCalleeSavedRegisters();
+ masm.Ret();
+ masm.FinalizeCode();
+ masm.GetBuffer()->SetExecutable();
+
+ for (std::pair<int, Simulator *> s : sim_vl) {
+ s.second->RunFrom(masm.GetLabelAddress<Instruction *>(&test));
+ initial_state_vl[s.first] = state_hash;
+ if (s.first == 128) seen_states.insert(state_hash);
+ }
+
+ masm.GetBuffer()->SetWritable();
+ masm.Reset();
+
+ // Count number of failed instructions, in order to allow changing instruction
+ // candidate strategy.
+ int miss_count = 0;
+
+ while (useful_insts.size() < count) {
+ miss_count++;
+
+ Instr inst;
+ if (cmdline_encoding != 0) {
+ // Initial instruction encoding supplied on the command line.
+ inst = cmdline_encoding;
+ cmdline_encoding = 0;
+ } else if (useful_insts.empty() || random_only || (miss_count > 10000)) {
+ // LCG-random instruction.
+ inst = static_cast<Instr>(mrand48());
+ } else {
+ // Instruction based on mutation of last successful instruction.
+ inst = Mutate(useful_insts.back().inst);
+ }
+
+ trial_decoder.Decode(reinterpret_cast<Instruction *>(&inst));
+ if (std::regex_search(reporter.MoveForm(), std::regex(target_re))) {
+ // Disallow "unimplemented" instructions.
+ std::string buffer_s(buffer);
+ if (buffer_s.find("unimplemented") != std::string::npos) continue;
+
+ // Disallow instructions with "sp" in their arguments, as we don't support
+ // instructions operating on memory, and the OS expects sp to be valid for
+ // signal handlers, etc.
+ size_t space = buffer_s.find(' ');
+ if ((space != std::string::npos) &&
+ (buffer_s.substr(space).find("sp") != std::string::npos))
+ continue;
+
+ fprintf(stderr, "Trying 0x%08x (%s)\n", inst, buffer);
+
+ // TODO: factorise this code into a CalculateState helper function.
+
+ // Initialise the machine to a known state.
+ masm.PushCalleeSavedRegisters();
+ SetInitialMachineState(&masm, input_set);
+
+ {
+ ExactAssemblyScope scope(&masm,
+ (useful_insts.size() + 1) * kInstructionSize);
+
+ // Emit any instructions already found to move the state to somewhere
+ // new.
+ for (const InstrData &i : useful_insts) {
+ masm.dci(i.inst);
+ }
+
+ // Try a new instruction.
+ masm.dci(inst);
+ }
+
+ // Compute the new state of the machine.
+ ComputeMachineStateHash(&masm, &state_hash);
+ masm.PopCalleeSavedRegisters();
+ masm.Ret();
+ masm.FinalizeCode();
+ masm.GetBuffer()->SetExecutable();
+
+ // Try the new instruction for VL128.
+ sim_vl[128]->RunFrom(masm.GetLabelAddress<Instruction *>(&test));
+ state_hash_vl[128] = state_hash;
+
+ if (seen_states.count(state_hash_vl[128]) == 0) {
+ // A new state! Run for all VLs, record it, add the instruction to the
+ // list of useful ones.
+
+ for (std::pair<int, Simulator *> s : sim_vl) {
+ if (s.first == 128) continue;
+ s.second->RunFrom(masm.GetLabelAddress<Instruction *>(&test));
+ state_hash_vl[s.first] = state_hash;
+ }
+
+ seen_states.insert(state_hash_vl[128]);
+ useful_insts.push_back({inst, buffer, state_hash_vl[128]});
+ miss_count = 0;
+ } else {
+ // Machine already reached here. Probably not an interesting
+ // instruction. NB. it's possible for an instruction to reach the same
+ // machine state as two or more others, but for these purposes, let's
+ // call that not useful.
+ fprintf(stderr,
+ "Already reached state 0x%08x, skipping 0x%08x, miss_count "
+ "%d\n",
+ state_hash_vl[128],
+ inst,
+ miss_count);
+ }
+
+ // Restart generation.
+ masm.GetBuffer()->SetWritable();
+ masm.Reset();
+ }
+ }
+
+ // Emit test case based on identified instructions and associated hashes.
+ printf("TEST_SVE(sve2_%s) {\n", target_re.c_str());
+ printf(
+ " SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE, CPUFeatures::kSVE2, "
+ "CPUFeatures::kNEON, "
+ "CPUFeatures::kCRC32);\n");
+ printf(" START();\n\n");
+ printf((input_set == kFpInputSet)
+ ? " SetInitialMachineState(&masm, kFpInputSet);\n"
+ : " SetInitialMachineState(&masm);\n");
+ printf(" // state = 0x%08x\n\n", initial_state_vl[128]);
+
+ printf(" {\n");
+ printf(" ExactAssemblyScope scope(&masm, %lu * kInstructionSize);\n",
+ useful_insts.size());
+ for (InstrData &i : useful_insts) {
+ printf(" __ dci(0x%08x); // %s\n", i.inst, i.disasm.c_str());
+ printf(" // vl128 state = 0x%08x\n", i.state_hash);
+ }
+ printf(" }\n\n");
+ printf(" uint32_t state;\n");
+ printf(" ComputeMachineStateHash(&masm, &state);\n");
+ printf(" __ Mov(x0, reinterpret_cast<uint64_t>(&state));\n");
+ printf(" __ Ldr(w0, MemOperand(x0));\n\n");
+ printf(" END();\n");
+ printf(" if (CAN_RUN()) {\n");
+ printf(" RUN();\n");
+ printf(" uint32_t expected_hashes[] = {\n");
+ for (std::pair<int, uint32_t> h : state_hash_vl) {
+ printf(" 0x%08x,\n", h.second);
+ }
+ printf(" };\n");
+ printf(
+ " ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - "
+ "1], x0);\n");
+ printf(" }\n}\n");
+
+ return 0;
+}
+#endif
diff --git a/test/test-pool-manager.cc b/test/test-pool-manager.cc
index 0073d4f6..df2f32b9 100644
--- a/test/test-pool-manager.cc
+++ b/test/test-pool-manager.cc
@@ -421,9 +421,9 @@ TEST(FuzzObjectDeletedWhenPlaced) {
// Remove bound objects.
for (std::vector<TestObject *>::iterator iter = objects.begin();
iter != objects.end();) {
- TestObject *object = *iter;
- if (object->IsBound()) {
- delete object;
+ TestObject *obj = *iter;
+ if (obj->IsBound()) {
+ delete obj;
iter = objects.erase(iter);
} else {
++iter;
@@ -494,7 +494,7 @@ TEST(FuzzObjectUpdatedWhenPlaced) {
// Pick another random label to bind.
const int kProbabilityToBind = 20;
if ((Random() % 100) < kProbabilityToBind) {
- TestBranchObject *object = objects[RandomObjectID(objects.size())];
+ TestBranchObject *object2 = objects[RandomObjectID(objects.size())];
// Binding can cause the pool emission, so check if we need to emit
// the pools. The actual backends will know the max alignment we
// might need here, so can simplify the check (won't need to check
@@ -503,15 +503,15 @@ TEST(FuzzObjectUpdatedWhenPlaced) {
if (pool_manager.MustEmit(pc, max_padding)) {
pc = pool_manager.Emit(&masm, pc, max_padding);
}
- pc = pool_manager.Bind(&masm, object, pc);
+ pc = pool_manager.Bind(&masm, object2, pc);
}
// Remove bound objects.
for (std::vector<TestBranchObject *>::iterator iter = objects.begin();
iter != objects.end();) {
- TestBranchObject *object = *iter;
- if (object->IsBound()) {
- delete object;
+ TestBranchObject *obj = *iter;
+ if (obj->IsBound()) {
+ delete obj;
iter = objects.erase(iter);
} else {
++iter;
@@ -818,9 +818,9 @@ TEST(MustEmitNewReferenceDueToSizeOfObject) {
{
// If the object is smaller, we can emit the reference.
TestObject smaller_object(kBigObjectSize - 4, 1);
- ForwardReference<int32_t> temp_ref(pc, kBranchSize, pc, pc + kPoolSize);
+ ForwardReference<int32_t> temp_ref2(pc, kBranchSize, pc, pc + kPoolSize);
VIXL_ASSERT(
- !pool_manager.MustEmit(pc, kBranchSize, &temp_ref, &smaller_object));
+ !pool_manager.MustEmit(pc, kBranchSize, &temp_ref2, &smaller_object));
// If the reference is going to be added after the current objects in the
// pool, we can still emit it.
diff --git a/test/test-runner.h b/test/test-runner.h
index ffc8c2ad..bb72ce67 100644
--- a/test/test-runner.h
+++ b/test/test-runner.h
@@ -32,9 +32,12 @@
namespace vixl {
-// Each actual test is represented by a Test instance.
+// Each test is represented by a Test instance.
// Tests are appended to a static linked list upon creation.
class Test {
+ typedef void(TestFunction)();
+ typedef void(TestFunctionWithConfig)(Test* config);
+
public:
// Most tests require no per-test configuration, and so take no arguments. A
// few tests require dynamic configuration, and are passed a `Test` object.
@@ -52,6 +55,17 @@ class Test {
last_ = this;
}
+ static Test* MakeSVETest(int vl,
+ const char* name,
+ TestFunctionWithConfig* fn) {
+ // We never free this memory, but we need it to live for as long as the
+ // static
+ // linked list of tests, and this is the easiest way to do it.
+ Test* test = new Test(name, fn);
+ test->set_sve_vl_in_bits(vl);
+ return test;
+ }
+
const char* name() { return name_; }
void run();
@@ -98,9 +112,6 @@ class Test {
generate_test_trace_ = value;
}
- typedef void(TestFunction)();
- typedef void(TestFunctionWithConfig)(Test* config);
-
private:
const char* name_;
diff --git a/test/test-trace-reference/log-all b/test/test-trace-reference/log-all
index 7f4dc150..9e9904ab 100644
--- a/test/test-trace-reference/log-all
+++ b/test/test-trace-reference/log-all
@@ -1420,9 +1420,9 @@
0x~~~~~~~~~~~~~~~~ 9e42d90f scvtf d15, x8, #10
# d15: 0x0000000000000000
0x~~~~~~~~~~~~~~~~ 5e21d887 scvtf s7, s4
-# v7: 0x00000000000000007ff000004e81442e
+# v7: 0x0000000000000000000000004e81442e
0x~~~~~~~~~~~~~~~~ 5f32e5e8 scvtf s8, s15, #14
-# v8: 0x0000000000000000c004000000000000
+# v8: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 1e22015d scvtf s29, w10
# s29: 0x00000000
0x~~~~~~~~~~~~~~~~ 1e02d6af scvtf s15, w21, #11
@@ -1444,9 +1444,9 @@
0x~~~~~~~~~~~~~~~~ 9e4377db ucvtf d27, x30, #35
# d27: 0x0000000000000000
0x~~~~~~~~~~~~~~~~ 7e21d8ab ucvtf s11, s5
-# v11: 0x0000000000000000400000004f7fe000
+# v11: 0x0000000000000000000000004f7fe000
0x~~~~~~~~~~~~~~~~ 7f32e6e0 ucvtf s0, s23, #14
-# v0: 0x000000000000000043d21c00480a8294
+# v0: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 1e230274 ucvtf s20, w19
# s20: 0x00000000
0x~~~~~~~~~~~~~~~~ 1e03bad5 ucvtf s21, w22, #18
@@ -1456,9 +1456,9 @@
0x~~~~~~~~~~~~~~~~ 9e03ac47 ucvtf s7, x2, #21
# s7: 0x38ff0000
0x~~~~~~~~~~~~~~~~ 5ee0b813 abs d19, d0
-# v19: 0x000000000000000043d21c00480a8294
+# v19: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 4e20b970 abs v16.16b, v11.16b
-# v16: 0x0000000000000000400000004f7f2000
+# v16: 0x0000000000000000000000004f7f2000
0x~~~~~~~~~~~~~~~~ 4ee0bbe0 abs v0.2d, v31.2d
# v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0ea0bb3b abs v27.2s, v25.2s
@@ -1478,7 +1478,7 @@
0x~~~~~~~~~~~~~~~~ 4eee87ea add v10.2d, v31.2d, v14.2d
# v10: 0x495000018a83940168a6954c14cfd693
0x~~~~~~~~~~~~~~~~ 0eb385cf add v15.2s, v14.2s, v19.2s
-# v15: 0x00000000000000006328b14b89d7c527
+# v15: 0x00000000000000001f56954b89d7c527
0x~~~~~~~~~~~~~~~~ 0e7186fb add v27.4h, v23.4h, v17.4h
# v27: 0x0000000000000000495000000a029400
0x~~~~~~~~~~~~~~~~ 4ebd8799 add v25.4s, v28.4s, v29.4s
@@ -1488,7 +1488,7 @@
0x~~~~~~~~~~~~~~~~ 4e618444 add v4.8h, v2.8h, v1.8h
# v4: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0eaf41ca addhn v10.2s, v14.2d, v15.2d
-# v10: 0x000000000000000000000000827f4696
+# v10: 0x0000000000000000000000003ead2a96
0x~~~~~~~~~~~~~~~~ 0e7a43ca addhn v10.4h, v30.4s, v26.4s
# v10: 0x00000000000000000000000000003c7f
0x~~~~~~~~~~~~~~~~ 0e36419f addhn v31.8b, v12.8h, v22.8h
@@ -1500,15 +1500,15 @@
0x~~~~~~~~~~~~~~~~ 4e7140ff addhn2 v31.8h, v7.4s, v17.4s
# v31: 0x000000000000b87f000000000000ffff
0x~~~~~~~~~~~~~~~~ 5ef1ba6e addp d14, v19.2d
-# v14: 0x000000000000000043d21c00480a8294
+# v14: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 4e3cbd03 addp v3.16b, v8.16b, v28.16b
-# v3: 0x0000000020febf0000000000c4000000
+# v3: 0x0000000020febf000000000000000000
0x~~~~~~~~~~~~~~~~ 4ef1bca8 addp v8.2d, v5.2d, v17.2d
# v8: 0x000000007f8000003effffffffe00000
0x~~~~~~~~~~~~~~~~ 0ebabfd6 addp v22.2s, v30.2s, v26.2s
# v22: 0x00000000000000003c7fffff00000000
0x~~~~~~~~~~~~~~~~ 0e6ebf1d addp v29.4h, v24.4h, v14.4h
-# v29: 0x00000000000000005fd2ca9e00000000
+# v29: 0x00000000000000000000ca9e00000000
0x~~~~~~~~~~~~~~~~ 4eb8bf5e addp v30.4s, v26.4s, v24.4s
# v30: 0x0000000000000000000000003c7fffff
0x~~~~~~~~~~~~~~~~ 0e27bf4c addp v12.8b, v26.8b, v7.8b
@@ -1522,7 +1522,7 @@
0x~~~~~~~~~~~~~~~~ 0e71bbdb addv h27, v30.4h
# v27: 0x00000000000000000000000000003c7e
0x~~~~~~~~~~~~~~~~ 4e71b9d3 addv h19, v14.8h
-# v19: 0x00000000000000000000000000002a70
+# v19: 0x0000000000000000000000000000ca9e
0x~~~~~~~~~~~~~~~~ 4eb1bb6e addv s14, v27.4s
# v14: 0x00000000000000000000000000003c7e
0x~~~~~~~~~~~~~~~~ 4e3b1d0a and v10.16b, v8.16b, v27.16b
@@ -1530,13 +1530,13 @@
0x~~~~~~~~~~~~~~~~ 0e301c25 and v5.8b, v1.8b, v16.8b
# v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e781c7a bic v26.16b, v3.16b, v24.16b
-# v26: 0x0000000020febf0000000000c4000000
+# v26: 0x0000000020febf000000000000000000
0x~~~~~~~~~~~~~~~~ 2f075487 bic v7.2s, #0xe4, lsl #16
# v7: 0x000000000000000000000000381b0000
0x~~~~~~~~~~~~~~~~ 2f01b47c bic v28.4h, #0x23, lsl #8
# v28: 0x000000000000000040dfdcffdcc00000
0x~~~~~~~~~~~~~~~~ 6f05159d bic v29.4s, #0xac, lsl #0
-# v29: 0x00000000000000005fd2ca1200000000
+# v29: 0x00000000000000000000ca1200000000
0x~~~~~~~~~~~~~~~~ 0e751fec bic v12.8b, v31.8b, v21.8b
# v12: 0x0000000000000000000000000000ffff
0x~~~~~~~~~~~~~~~~ 6f049712 bic v18.8h, #0x98, lsl #0
@@ -1546,13 +1546,13 @@
0x~~~~~~~~~~~~~~~~ 2efb1ee2 bif v2.8b, v23.8b, v27.8b
# v2: 0x0000000000000000495000008a828000
0x~~~~~~~~~~~~~~~~ 6ead1c68 bit v8.16b, v3.16b, v13.16b
-# v8: 0x000000007f8000003effffffcc000000
+# v8: 0x000000007f8000003effffffc8000000
0x~~~~~~~~~~~~~~~~ 2eb71ca5 bit v5.8b, v5.8b, v23.8b
# v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e771fe9 bsl v9.16b, v31.16b, v23.16b
# v9: 0x0100000008009801010000000800dc00
0x~~~~~~~~~~~~~~~~ 2e631cee bsl v14.8b, v7.8b, v3.8b
-# v14: 0x000000000000000000000000c4000000
+# v14: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e2048bd cls v29.16b, v5.16b
# v29: 0x07070707070707070707070707070707
0x~~~~~~~~~~~~~~~~ 0ea04815 cls v21.2s, v0.2s
@@ -1564,7 +1564,7 @@
0x~~~~~~~~~~~~~~~~ 0e204893 cls v19.8b, v4.8b
# v19: 0x00000000000000000707070707070707
0x~~~~~~~~~~~~~~~~ 4e6049cf cls v15.8h, v14.8h
-# v15: 0x000f000f000f000f000f000f0001000f
+# v15: 0x000f000f000f000f000f000f000f000f
0x~~~~~~~~~~~~~~~~ 6e204881 clz v1.16b, v4.16b
# v1: 0x08080808080808080808080808080808
0x~~~~~~~~~~~~~~~~ 2ea04a3b clz v27.2s, v17.2s
@@ -1572,17 +1572,17 @@
0x~~~~~~~~~~~~~~~~ 2e604929 clz v9.4h, v9.4h
# v9: 0x00000000000000000007001000040000
0x~~~~~~~~~~~~~~~~ 6ea049ff clz v31.4s, v15.4s
-# v31: 0x0000000c0000000c0000000c0000000f
+# v31: 0x0000000c0000000c0000000c0000000c
0x~~~~~~~~~~~~~~~~ 2e204a6e clz v14.8b, v19.8b
# v14: 0x00000000000000000505050505050505
0x~~~~~~~~~~~~~~~~ 6e604966 clz v6.8h, v11.8h
-# v6: 0x00100010001000100001001000010000
+# v6: 0x00100010001000100010001000010000
0x~~~~~~~~~~~~~~~~ 7efd8cb2 cmeq d18, d5, d29
# v18: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5ee09bee cmeq d14, d31, #0
# v14: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e368c73 cmeq v19.16b, v3.16b, v22.16b
-# v19: 0xffffffff000000ff0000000000ffffff
+# v19: 0xffffffff000000ff00000000ffffffff
0x~~~~~~~~~~~~~~~~ 4e20992f cmeq v15.16b, v9.16b, #0
# v15: 0xffffffffffffffffff00ff00ff00ffff
0x~~~~~~~~~~~~~~~~ 6eea8e0c cmeq v12.2d, v16.2d, v10.2d
@@ -1622,15 +1622,15 @@
0x~~~~~~~~~~~~~~~~ 6ee08ae6 cmge v6.2d, v23.2d, #0
# v6: 0xffffffffffffffffffffffffffffffff
0x~~~~~~~~~~~~~~~~ 0ea33ed9 cmge v25.2s, v22.2s, v3.2s
-# v25: 0x000000000000000000000000ffffffff
+# v25: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2ea08975 cmge v21.2s, v11.2s, #0
# v21: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 0e6c3c70 cmge v16.4h, v3.4h, v12.4h
-# v16: 0x0000000000000000ffffffff0000ffff
+# v16: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 2e608937 cmge v23.4h, v9.4h, #0
# v23: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 4eab3c47 cmge v7.4s, v2.4s, v11.4s
-# v7: 0xffffffffffffffff0000000000000000
+# v7: 0xffffffffffffffffffffffff00000000
0x~~~~~~~~~~~~~~~~ 6ea08ac0 cmge v0.4s, v22.4s, #0
# v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e293fca cmge v10.8b, v30.8b, v9.8b
@@ -1662,11 +1662,11 @@
0x~~~~~~~~~~~~~~~~ 0e608876 cmgt v22.4h, v3.4h, #0
# v22: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4ebb3565 cmgt v5.4s, v11.4s, v27.4s
-# v5: 0x0000000000000000ffffffffffffffff
+# v5: 0x000000000000000000000000ffffffff
0x~~~~~~~~~~~~~~~~ 4ea08a8d cmgt v13.4s, v20.4s, #0
# v13: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e2737fb cmgt v27.8b, v31.8b, v7.8b
-# v27: 0x0000000000000000000000ff000000ff
+# v27: 0x0000000000000000ffffffff000000ff
0x~~~~~~~~~~~~~~~~ 0e208805 cmgt v5.8b, v0.8b, #0
# v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e6d3796 cmgt v22.8h, v28.8h, v13.8h
@@ -1688,7 +1688,7 @@
0x~~~~~~~~~~~~~~~~ 2e3c3707 cmhi v7.8b, v24.8b, v28.8b
# v7: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e79354b cmhi v11.8h, v10.8h, v25.8h
-# v11: 0x0000000000000000ffffffff00000000
+# v11: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 7ef13d81 cmhs d1, d12, d17
# v1: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e3e3f35 cmhs v21.16b, v25.16b, v30.16b
@@ -1730,11 +1730,11 @@
0x~~~~~~~~~~~~~~~~ 0ea0ab99 cmlt v25.2s, v28.2s, #0
# v25: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e60a960 cmlt v0.4h, v11.4h, #0
-# v0: 0x0000000000000000ffffffff00000000
+# v0: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 4ea0a8b8 cmlt v24.4s, v5.4s, #0
# v24: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e20a97a cmlt v26.8b, v11.8b, #0
-# v26: 0x0000000000000000ffffffff00000000
+# v26: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 4e60aaa1 cmlt v1.8h, v21.8h, #0
# v1: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 5efe8efc cmtst d28, d23, d30
@@ -9708,7 +9708,7 @@
0x~~~~~~~~~~~~~~~~ 0e61682b fcvtn v11.2s, v1.2d
# v11: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e216848 fcvtn v8.4h, v2.4s
-# v8: 0x37a00000000000000000000000000000
+# v8: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e616bb8 fcvtn2 v24.4s, v29.2d
# v24: 0x0000000000000000377f0000377f0000
0x~~~~~~~~~~~~~~~~ 4e216944 fcvtn2 v4.8h, v10.4s
@@ -9766,41 +9766,41 @@
0x~~~~~~~~~~~~~~~~ 6f2efed3 fcvtzu v19.4s, v22.4s, #18
# v19: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e6ffd0f fdiv v15.2d, v8.2d, v15.2d
-# v15: 0x7ff00000000000007ff8000000000000
+# v15: 0x7ff80000000000007ff8000000000000
0x~~~~~~~~~~~~~~~~ 2e3afd2c fdiv v12.2s, v9.2s, v26.2s
# v12: 0x00000000000000007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 6e33fed3 fdiv v19.4s, v22.4s, v19.4s
# v19: 0xffffffffffffffffffffffffffffffff
0x~~~~~~~~~~~~~~~~ 4e68f4f3 fmax v19.2d, v7.2d, v8.2d
-# v19: 0x37a00000000000000000000000000000
+# v19: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e3df599 fmax v25.2s, v12.2s, v29.2s
# v25: 0x00000000000000007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 4e25f5e6 fmax v6.4s, v15.4s, v5.4s
-# v6: 0x7ff00000000000007ff8000000000000
+# v6: 0x7ff80000000000007ff8000000000000
0x~~~~~~~~~~~~~~~~ 4e74c510 fmaxnm v16.2d, v8.2d, v20.2d
-# v16: 0x37a0000000000000000000000180fe00
+# v16: 0x0000000000000000000000000180fe00
0x~~~~~~~~~~~~~~~~ 0e39c74f fmaxnm v15.2s, v26.2s, v25.2s
# v15: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e30c5d7 fmaxnm v23.4s, v14.4s, v16.4s
-# v23: 0x37a0000000000000000000000180fe00
+# v23: 0x0000000000000000000000000180fe00
0x~~~~~~~~~~~~~~~~ 7e70ca66 fmaxnmp d6, v19.2d
-# v6: 0x000000000000000037a0000000000000
+# v6: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7e30cb5b fmaxnmp s27, v26.2s
# v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e77c588 fmaxnmp v8.2d, v12.2d, v23.2d
-# v8: 0x37a00000000000007fc000007fc00000
+# v8: 0x000000000180fe007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 2e36c72d fmaxnmp v13.2s, v25.2s, v22.2s
# v13: 0x0000000000000000ffffffff7fc00000
0x~~~~~~~~~~~~~~~~ 6e31c56f fmaxnmp v15.4s, v11.4s, v17.4s
# v15: 0xffffffff000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e30ca7b fmaxnmv s27, v19.4s
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7e70f9d4 fmaxp d20, v14.2d
# v20: 0x0000000000000000ffffffff00000000
0x~~~~~~~~~~~~~~~~ 7e30f852 fmaxp s18, v2.2s
# v18: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e7ff6e9 fmaxp v9.2d, v23.2d, v31.2d
-# v9: 0x00000000ffffffff37a0000000000000
+# v9: 0x00000000ffffffff000000000180fe00
0x~~~~~~~~~~~~~~~~ 2e3ff6c7 fmaxp v7.2s, v22.2s, v31.2s
# v7: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 6e3df4f2 fmaxp v18.4s, v7.4s, v29.4s
@@ -9843,7 +9843,7 @@
# v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s
# v25: 0x0000000000000000000000007fc00000
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1]
# v23: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0]
# v23: 0x00000000000000000000000000000000
@@ -9858,9 +9858,9 @@
0x~~~~~~~~~~~~~~~~ 4e2bcd70 fmla v16.4s, v11.4s, v11.4s
# v16: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4f891afb fmla v27.4s, v23.4s, v9.s[2]
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5fc653db fmls d27, d30, v6.d[0]
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5f825215 fmls s21, s16, v2.s[0]
# v21: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4ef5ce65 fmls v5.2d, v19.2d, v21.2d
@@ -9872,7 +9872,7 @@
0x~~~~~~~~~~~~~~~~ 0fab5243 fmls v3.2s, v18.2s, v11.s[1]
# v3: 0x0000000000000000000000007fffffff
0x~~~~~~~~~~~~~~~~ 4ebeccbb fmls v27.4s, v5.4s, v30.4s
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4fa45a9a fmls v26.4s, v20.4s, v4.s[3]
# v26: 0x00000000000000007fffffff00000000
0x~~~~~~~~~~~~~~~~ 6f06f6ce fmov v14.2d, #0xd6 (-0.3438)
@@ -9885,13 +9885,13 @@
# v28: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1]
# x18: 0x0000000000000000
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1]
# v12: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3]
# v30: 0x000000000000000000000000ffffffff
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d
# v25: 0x00000000000000000000000000000000
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2]
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1]
# v10: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s
# v7: 0x00000000000000000000000000000000
@@ -9901,7 +9901,7 @@
# v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0]
# v11: 0x7fc000007fc000007fc000007fffffff
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2]
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1]
# v28: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1]
# v25: 0x00000000000000000000000000000000
@@ -9928,7 +9928,7 @@
0x~~~~~~~~~~~~~~~~ 0ea1daca frecpe v10.2s, v22.2s
# v10: 0x00000000000000007f8000007f800000
0x~~~~~~~~~~~~~~~~ 4ea1d8c5 frecpe v5.4s, v6.4s
-# v5: 0x7f8000007f800000474c80007f800000
+# v5: 0x7f8000007f8000007f8000007f800000
0x~~~~~~~~~~~~~~~~ 4e7afcf6 frecps v22.2d, v7.2d, v26.2d
# v22: 0x40000000000000004000000000000000
0x~~~~~~~~~~~~~~~~ 0e22ff7f frecps v31.2s, v27.2s, v2.2s
@@ -10020,7 +10020,7 @@
0x~~~~~~~~~~~~~~~~ 2e21d88b ucvtf v11.2s, v4.2s
# v11: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2f27e47d ucvtf v29.2s, v3.2s, #25
-# v29: 0x7fc000007fc000000000000000000000
+# v29: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e21daf6 ucvtf v22.4s, v23.4s
# v22: 0x4effe000000000004e001a4000000000
0x~~~~~~~~~~~~~~~~ 6f27e532 ucvtf v18.4s, v9.4s, #25
@@ -11173,10 +11173,10 @@
0x~~~~~~~~~~~~~~~~ e551ec06 st3w {z6.s, z7.s, z8.s}, p3, [x0, #3, mul vl]
# z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -11198,10 +11198,10 @@
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ e5c27007 st3d {z7.d, z8.d, z9.d}, p4, [x0, x2, lsl #3]
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
# z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -11223,9 +11223,9 @@
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ a441f418 ld3b {z24.b, z25.b, z26.b}, p5/z, [x0, #3, mul vl]
-# z24<127:0>: 0x0000000000000000000000ff00000000
-# z25<127:0>: 0xa000000000000000000000ffc0000000
-# z26<127:0>: 0x3700000000000000000000007f000000
+# z24<127:0>: 0x00000000fe000000000000ff00000000
+# z25<127:0>: 0x0000000080000000000000ffc0000000
+# z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -11236,10 +11236,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# z24<255:128>: 0x00000000000000000000000000000000
# z25<255:128>: 0x00000000000000000000000000000000
# z26<255:128>: 0x00000000000000000000000000000000
@@ -11366,11 +11366,11 @@
0x~~~~~~~~~~~~~~~~ a541f81a ld3w {z26.s, z27.s, z28.s}, p6/z, [x0, #3, mul vl]
# z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
# z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -11401,10 +11401,10 @@
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ a5c1f41b ld3d {z27.d, z28.d, z29.d}, p5/z, [x0, #3, mul vl]
# z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -11856,5 +11856,685 @@
# z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+# sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+# x0: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp]
+# w0: 0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp]
+# w1: 0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp]
+# w2: 0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp]
+# w3: 0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp]
+# w0: 0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp]
+# w1: 0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp]
+# w2: 0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp]
+# w3: 0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp]
+# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp]
+# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp]
+# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp]
+# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp]
+# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp]
+# w0: 0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp]
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp]
+# w1: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp]
+# w2: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp]
+# w3: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp]
+# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp]
+# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp]
+# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp]
+# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp]
+# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp]
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp]
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp]
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp]
+# w1: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp]
+# w3: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp]
+# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp]
+# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp]
+# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp]
+# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp]
+# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp]
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp]
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp]
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp]
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp]
+# w1: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp]
+# w2: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp]
+# w3: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp]
+# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp]
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp]
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp]
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp]
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp]
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp]
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp]
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp]
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp]
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp]
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp]
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp]
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp]
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp]
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp]
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp]
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp]
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp]
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp]
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp]
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp]
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp]
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp]
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp]
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp]
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp]
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp]
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp]
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp]
+# w2: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp]
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp]
+# w0: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp]
+# w1: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp]
+# w2: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp]
+# w3: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp]
+# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp]
+# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp]
+# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp]
+# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp]
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp]
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp]
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp]
+# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp]
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp]
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp]
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
+# sp: 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ d65f03c0 ret
# Branch to 0x~~~~~~~~~~~~~~~~.
diff --git a/test/test-trace-reference/log-all-colour b/test/test-trace-reference/log-all-colour
index a844829f..bf5ec20f 100644
--- a/test/test-trace-reference/log-all-colour
+++ b/test/test-trace-reference/log-all-colour
@@ -1420,9 +1420,9 @@
0x~~~~~~~~~~~~~~~~ 9e42d90f scvtf d15, x8, #10
#  d15:  0x0000000000000000
0x~~~~~~~~~~~~~~~~ 5e21d887 scvtf s7, s4
-#  v7: 0x00000000000000007ff000004e81442e
+#  v7: 0x0000000000000000000000004e81442e
0x~~~~~~~~~~~~~~~~ 5f32e5e8 scvtf s8, s15, #14
-#  v8: 0x0000000000000000c004000000000000
+#  v8: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 1e22015d scvtf s29, w10
#  s29:  0x00000000
0x~~~~~~~~~~~~~~~~ 1e02d6af scvtf s15, w21, #11
@@ -1444,9 +1444,9 @@
0x~~~~~~~~~~~~~~~~ 9e4377db ucvtf d27, x30, #35
#  d27:  0x0000000000000000
0x~~~~~~~~~~~~~~~~ 7e21d8ab ucvtf s11, s5
-#  v11: 0x0000000000000000400000004f7fe000
+#  v11: 0x0000000000000000000000004f7fe000
0x~~~~~~~~~~~~~~~~ 7f32e6e0 ucvtf s0, s23, #14
-#  v0: 0x000000000000000043d21c00480a8294
+#  v0: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 1e230274 ucvtf s20, w19
#  s20:  0x00000000
0x~~~~~~~~~~~~~~~~ 1e03bad5 ucvtf s21, w22, #18
@@ -1456,9 +1456,9 @@
0x~~~~~~~~~~~~~~~~ 9e03ac47 ucvtf s7, x2, #21
#  s7:  0x38ff0000
0x~~~~~~~~~~~~~~~~ 5ee0b813 abs d19, d0
-#  v19: 0x000000000000000043d21c00480a8294
+#  v19: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 4e20b970 abs v16.16b, v11.16b
-#  v16: 0x0000000000000000400000004f7f2000
+#  v16: 0x0000000000000000000000004f7f2000
0x~~~~~~~~~~~~~~~~ 4ee0bbe0 abs v0.2d, v31.2d
#  v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0ea0bb3b abs v27.2s, v25.2s
@@ -1478,7 +1478,7 @@
0x~~~~~~~~~~~~~~~~ 4eee87ea add v10.2d, v31.2d, v14.2d
#  v10: 0x495000018a83940168a6954c14cfd693
0x~~~~~~~~~~~~~~~~ 0eb385cf add v15.2s, v14.2s, v19.2s
-#  v15: 0x00000000000000006328b14b89d7c527
+#  v15: 0x00000000000000001f56954b89d7c527
0x~~~~~~~~~~~~~~~~ 0e7186fb add v27.4h, v23.4h, v17.4h
#  v27: 0x0000000000000000495000000a029400
0x~~~~~~~~~~~~~~~~ 4ebd8799 add v25.4s, v28.4s, v29.4s
@@ -1488,7 +1488,7 @@
0x~~~~~~~~~~~~~~~~ 4e618444 add v4.8h, v2.8h, v1.8h
#  v4: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0eaf41ca addhn v10.2s, v14.2d, v15.2d
-#  v10: 0x000000000000000000000000827f4696
+#  v10: 0x0000000000000000000000003ead2a96
0x~~~~~~~~~~~~~~~~ 0e7a43ca addhn v10.4h, v30.4s, v26.4s
#  v10: 0x00000000000000000000000000003c7f
0x~~~~~~~~~~~~~~~~ 0e36419f addhn v31.8b, v12.8h, v22.8h
@@ -1500,15 +1500,15 @@
0x~~~~~~~~~~~~~~~~ 4e7140ff addhn2 v31.8h, v7.4s, v17.4s
#  v31: 0x000000000000b87f000000000000ffff
0x~~~~~~~~~~~~~~~~ 5ef1ba6e addp d14, v19.2d
-#  v14: 0x000000000000000043d21c00480a8294
+#  v14: 0x000000000000000000000000480a8294
0x~~~~~~~~~~~~~~~~ 4e3cbd03 addp v3.16b, v8.16b, v28.16b
-#  v3: 0x0000000020febf0000000000c4000000
+#  v3: 0x0000000020febf000000000000000000
0x~~~~~~~~~~~~~~~~ 4ef1bca8 addp v8.2d, v5.2d, v17.2d
#  v8: 0x000000007f8000003effffffffe00000
0x~~~~~~~~~~~~~~~~ 0ebabfd6 addp v22.2s, v30.2s, v26.2s
#  v22: 0x00000000000000003c7fffff00000000
0x~~~~~~~~~~~~~~~~ 0e6ebf1d addp v29.4h, v24.4h, v14.4h
-#  v29: 0x00000000000000005fd2ca9e00000000
+#  v29: 0x00000000000000000000ca9e00000000
0x~~~~~~~~~~~~~~~~ 4eb8bf5e addp v30.4s, v26.4s, v24.4s
#  v30: 0x0000000000000000000000003c7fffff
0x~~~~~~~~~~~~~~~~ 0e27bf4c addp v12.8b, v26.8b, v7.8b
@@ -1522,7 +1522,7 @@
0x~~~~~~~~~~~~~~~~ 0e71bbdb addv h27, v30.4h
#  v27: 0x00000000000000000000000000003c7e
0x~~~~~~~~~~~~~~~~ 4e71b9d3 addv h19, v14.8h
-#  v19: 0x00000000000000000000000000002a70
+#  v19: 0x0000000000000000000000000000ca9e
0x~~~~~~~~~~~~~~~~ 4eb1bb6e addv s14, v27.4s
#  v14: 0x00000000000000000000000000003c7e
0x~~~~~~~~~~~~~~~~ 4e3b1d0a and v10.16b, v8.16b, v27.16b
@@ -1530,13 +1530,13 @@
0x~~~~~~~~~~~~~~~~ 0e301c25 and v5.8b, v1.8b, v16.8b
#  v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e781c7a bic v26.16b, v3.16b, v24.16b
-#  v26: 0x0000000020febf0000000000c4000000
+#  v26: 0x0000000020febf000000000000000000
0x~~~~~~~~~~~~~~~~ 2f075487 bic v7.2s, #0xe4, lsl #16
#  v7: 0x000000000000000000000000381b0000
0x~~~~~~~~~~~~~~~~ 2f01b47c bic v28.4h, #0x23, lsl #8
#  v28: 0x000000000000000040dfdcffdcc00000
0x~~~~~~~~~~~~~~~~ 6f05159d bic v29.4s, #0xac, lsl #0
-#  v29: 0x00000000000000005fd2ca1200000000
+#  v29: 0x00000000000000000000ca1200000000
0x~~~~~~~~~~~~~~~~ 0e751fec bic v12.8b, v31.8b, v21.8b
#  v12: 0x0000000000000000000000000000ffff
0x~~~~~~~~~~~~~~~~ 6f049712 bic v18.8h, #0x98, lsl #0
@@ -1546,13 +1546,13 @@
0x~~~~~~~~~~~~~~~~ 2efb1ee2 bif v2.8b, v23.8b, v27.8b
#  v2: 0x0000000000000000495000008a828000
0x~~~~~~~~~~~~~~~~ 6ead1c68 bit v8.16b, v3.16b, v13.16b
-#  v8: 0x000000007f8000003effffffcc000000
+#  v8: 0x000000007f8000003effffffc8000000
0x~~~~~~~~~~~~~~~~ 2eb71ca5 bit v5.8b, v5.8b, v23.8b
#  v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e771fe9 bsl v9.16b, v31.16b, v23.16b
#  v9: 0x0100000008009801010000000800dc00
0x~~~~~~~~~~~~~~~~ 2e631cee bsl v14.8b, v7.8b, v3.8b
-#  v14: 0x000000000000000000000000c4000000
+#  v14: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e2048bd cls v29.16b, v5.16b
#  v29: 0x07070707070707070707070707070707
0x~~~~~~~~~~~~~~~~ 0ea04815 cls v21.2s, v0.2s
@@ -1564,7 +1564,7 @@
0x~~~~~~~~~~~~~~~~ 0e204893 cls v19.8b, v4.8b
#  v19: 0x00000000000000000707070707070707
0x~~~~~~~~~~~~~~~~ 4e6049cf cls v15.8h, v14.8h
-#  v15: 0x000f000f000f000f000f000f0001000f
+#  v15: 0x000f000f000f000f000f000f000f000f
0x~~~~~~~~~~~~~~~~ 6e204881 clz v1.16b, v4.16b
#  v1: 0x08080808080808080808080808080808
0x~~~~~~~~~~~~~~~~ 2ea04a3b clz v27.2s, v17.2s
@@ -1572,17 +1572,17 @@
0x~~~~~~~~~~~~~~~~ 2e604929 clz v9.4h, v9.4h
#  v9: 0x00000000000000000007001000040000
0x~~~~~~~~~~~~~~~~ 6ea049ff clz v31.4s, v15.4s
-#  v31: 0x0000000c0000000c0000000c0000000f
+#  v31: 0x0000000c0000000c0000000c0000000c
0x~~~~~~~~~~~~~~~~ 2e204a6e clz v14.8b, v19.8b
#  v14: 0x00000000000000000505050505050505
0x~~~~~~~~~~~~~~~~ 6e604966 clz v6.8h, v11.8h
-#  v6: 0x00100010001000100001001000010000
+#  v6: 0x00100010001000100010001000010000
0x~~~~~~~~~~~~~~~~ 7efd8cb2 cmeq d18, d5, d29
#  v18: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5ee09bee cmeq d14, d31, #0
#  v14: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e368c73 cmeq v19.16b, v3.16b, v22.16b
-#  v19: 0xffffffff000000ff0000000000ffffff
+#  v19: 0xffffffff000000ff00000000ffffffff
0x~~~~~~~~~~~~~~~~ 4e20992f cmeq v15.16b, v9.16b, #0
#  v15: 0xffffffffffffffffff00ff00ff00ffff
0x~~~~~~~~~~~~~~~~ 6eea8e0c cmeq v12.2d, v16.2d, v10.2d
@@ -1622,15 +1622,15 @@
0x~~~~~~~~~~~~~~~~ 6ee08ae6 cmge v6.2d, v23.2d, #0
#  v6: 0xffffffffffffffffffffffffffffffff
0x~~~~~~~~~~~~~~~~ 0ea33ed9 cmge v25.2s, v22.2s, v3.2s
-#  v25: 0x000000000000000000000000ffffffff
+#  v25: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2ea08975 cmge v21.2s, v11.2s, #0
#  v21: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 0e6c3c70 cmge v16.4h, v3.4h, v12.4h
-#  v16: 0x0000000000000000ffffffff0000ffff
+#  v16: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 2e608937 cmge v23.4h, v9.4h, #0
#  v23: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 4eab3c47 cmge v7.4s, v2.4s, v11.4s
-#  v7: 0xffffffffffffffff0000000000000000
+#  v7: 0xffffffffffffffffffffffff00000000
0x~~~~~~~~~~~~~~~~ 6ea08ac0 cmge v0.4s, v22.4s, #0
#  v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e293fca cmge v10.8b, v30.8b, v9.8b
@@ -1662,11 +1662,11 @@
0x~~~~~~~~~~~~~~~~ 0e608876 cmgt v22.4h, v3.4h, #0
#  v22: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4ebb3565 cmgt v5.4s, v11.4s, v27.4s
-#  v5: 0x0000000000000000ffffffffffffffff
+#  v5: 0x000000000000000000000000ffffffff
0x~~~~~~~~~~~~~~~~ 4ea08a8d cmgt v13.4s, v20.4s, #0
#  v13: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e2737fb cmgt v27.8b, v31.8b, v7.8b
-#  v27: 0x0000000000000000000000ff000000ff
+#  v27: 0x0000000000000000ffffffff000000ff
0x~~~~~~~~~~~~~~~~ 0e208805 cmgt v5.8b, v0.8b, #0
#  v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e6d3796 cmgt v22.8h, v28.8h, v13.8h
@@ -1688,7 +1688,7 @@
0x~~~~~~~~~~~~~~~~ 2e3c3707 cmhi v7.8b, v24.8b, v28.8b
#  v7: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e79354b cmhi v11.8h, v10.8h, v25.8h
-#  v11: 0x0000000000000000ffffffff00000000
+#  v11: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 7ef13d81 cmhs d1, d12, d17
#  v1: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e3e3f35 cmhs v21.16b, v25.16b, v30.16b
@@ -1730,11 +1730,11 @@
0x~~~~~~~~~~~~~~~~ 0ea0ab99 cmlt v25.2s, v28.2s, #0
#  v25: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e60a960 cmlt v0.4h, v11.4h, #0
-#  v0: 0x0000000000000000ffffffff00000000
+#  v0: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 4ea0a8b8 cmlt v24.4s, v5.4s, #0
#  v24: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e20a97a cmlt v26.8b, v11.8b, #0
-#  v26: 0x0000000000000000ffffffff00000000
+#  v26: 0x0000000000000000ffffffffffff0000
0x~~~~~~~~~~~~~~~~ 4e60aaa1 cmlt v1.8h, v21.8h, #0
#  v1: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 5efe8efc cmtst d28, d23, d30
@@ -9708,7 +9708,7 @@
0x~~~~~~~~~~~~~~~~ 0e61682b fcvtn v11.2s, v1.2d
#  v11: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e216848 fcvtn v8.4h, v2.4s
-#  v8: 0x37a00000000000000000000000000000
+#  v8: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e616bb8 fcvtn2 v24.4s, v29.2d
#  v24: 0x0000000000000000377f0000377f0000
0x~~~~~~~~~~~~~~~~ 4e216944 fcvtn2 v4.8h, v10.4s
@@ -9766,41 +9766,41 @@
0x~~~~~~~~~~~~~~~~ 6f2efed3 fcvtzu v19.4s, v22.4s, #18
#  v19: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e6ffd0f fdiv v15.2d, v8.2d, v15.2d
-#  v15: 0x7ff00000000000007ff8000000000000
+#  v15: 0x7ff80000000000007ff8000000000000
0x~~~~~~~~~~~~~~~~ 2e3afd2c fdiv v12.2s, v9.2s, v26.2s
#  v12: 0x00000000000000007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 6e33fed3 fdiv v19.4s, v22.4s, v19.4s
#  v19: 0xffffffffffffffffffffffffffffffff
0x~~~~~~~~~~~~~~~~ 4e68f4f3 fmax v19.2d, v7.2d, v8.2d
-#  v19: 0x37a00000000000000000000000000000
+#  v19: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 0e3df599 fmax v25.2s, v12.2s, v29.2s
#  v25: 0x00000000000000007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 4e25f5e6 fmax v6.4s, v15.4s, v5.4s
-#  v6: 0x7ff00000000000007ff8000000000000
+#  v6: 0x7ff80000000000007ff8000000000000
0x~~~~~~~~~~~~~~~~ 4e74c510 fmaxnm v16.2d, v8.2d, v20.2d
-#  v16: 0x37a0000000000000000000000180fe00
+#  v16: 0x0000000000000000000000000180fe00
0x~~~~~~~~~~~~~~~~ 0e39c74f fmaxnm v15.2s, v26.2s, v25.2s
#  v15: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4e30c5d7 fmaxnm v23.4s, v14.4s, v16.4s
-#  v23: 0x37a0000000000000000000000180fe00
+#  v23: 0x0000000000000000000000000180fe00
0x~~~~~~~~~~~~~~~~ 7e70ca66 fmaxnmp d6, v19.2d
-#  v6: 0x000000000000000037a0000000000000
+#  v6: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7e30cb5b fmaxnmp s27, v26.2s
#  v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e77c588 fmaxnmp v8.2d, v12.2d, v23.2d
-#  v8: 0x37a00000000000007fc000007fc00000
+#  v8: 0x000000000180fe007fc000007fc00000
0x~~~~~~~~~~~~~~~~ 2e36c72d fmaxnmp v13.2s, v25.2s, v22.2s
#  v13: 0x0000000000000000ffffffff7fc00000
0x~~~~~~~~~~~~~~~~ 6e31c56f fmaxnmp v15.4s, v11.4s, v17.4s
#  v15: 0xffffffff000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e30ca7b fmaxnmv s27, v19.4s
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7e70f9d4 fmaxp d20, v14.2d
#  v20: 0x0000000000000000ffffffff00000000
0x~~~~~~~~~~~~~~~~ 7e30f852 fmaxp s18, v2.2s
#  v18: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e7ff6e9 fmaxp v9.2d, v23.2d, v31.2d
-#  v9: 0x00000000ffffffff37a0000000000000
+#  v9: 0x00000000ffffffff000000000180fe00
0x~~~~~~~~~~~~~~~~ 2e3ff6c7 fmaxp v7.2s, v22.2s, v31.2s
#  v7: 0x0000000000000000ffffffffffffffff
0x~~~~~~~~~~~~~~~~ 6e3df4f2 fmaxp v18.4s, v7.4s, v29.4s
@@ -9843,7 +9843,7 @@
#  v0: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s
#  v25: 0x0000000000000000000000007fc00000
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1]
#  v23: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0]
#  v23: 0x00000000000000000000000000000000
@@ -9858,9 +9858,9 @@
0x~~~~~~~~~~~~~~~~ 4e2bcd70 fmla v16.4s, v11.4s, v11.4s
#  v16: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4f891afb fmla v27.4s, v23.4s, v9.s[2]
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5fc653db fmls d27, d30, v6.d[0]
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5f825215 fmls s21, s16, v2.s[0]
#  v21: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4ef5ce65 fmls v5.2d, v19.2d, v21.2d
@@ -9872,7 +9872,7 @@
0x~~~~~~~~~~~~~~~~ 0fab5243 fmls v3.2s, v18.2s, v11.s[1]
#  v3: 0x0000000000000000000000007fffffff
0x~~~~~~~~~~~~~~~~ 4ebeccbb fmls v27.4s, v5.4s, v30.4s
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4fa45a9a fmls v26.4s, v20.4s, v4.s[3]
#  v26: 0x00000000000000007fffffff00000000
0x~~~~~~~~~~~~~~~~ 6f06f6ce fmov v14.2d, #0xd6 (-0.3438)
@@ -9885,13 +9885,13 @@
#  v28: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1]
#  x18: 0x0000000000000000
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1]
#  v12: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3]
#  v30: 0x000000000000000000000000ffffffff
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d
#  v25: 0x00000000000000000000000000000000
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2]
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1]
#  v10: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s
#  v7: 0x00000000000000000000000000000000
@@ -9901,7 +9901,7 @@
#  v5: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0]
#  v11: 0x7fc000007fc000007fc000007fffffff
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2]
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1]
#  v28: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1]
#  v25: 0x00000000000000000000000000000000
@@ -9928,7 +9928,7 @@
0x~~~~~~~~~~~~~~~~ 0ea1daca frecpe v10.2s, v22.2s
#  v10: 0x00000000000000007f8000007f800000
0x~~~~~~~~~~~~~~~~ 4ea1d8c5 frecpe v5.4s, v6.4s
-#  v5: 0x7f8000007f800000474c80007f800000
+#  v5: 0x7f8000007f8000007f8000007f800000
0x~~~~~~~~~~~~~~~~ 4e7afcf6 frecps v22.2d, v7.2d, v26.2d
#  v22: 0x40000000000000004000000000000000
0x~~~~~~~~~~~~~~~~ 0e22ff7f frecps v31.2s, v27.2s, v2.2s
@@ -10020,7 +10020,7 @@
0x~~~~~~~~~~~~~~~~ 2e21d88b ucvtf v11.2s, v4.2s
#  v11: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 2f27e47d ucvtf v29.2s, v3.2s, #25
-#  v29: 0x7fc000007fc000000000000000000000
+#  v29: 0x00000000000000000000000000000000
0x~~~~~~~~~~~~~~~~ 6e21daf6 ucvtf v22.4s, v23.4s
#  v22: 0x4effe000000000004e001a4000000000
0x~~~~~~~~~~~~~~~~ 6f27e532 ucvtf v18.4s, v9.4s, #25
@@ -11173,10 +11173,10 @@
0x~~~~~~~~~~~~~~~~ e551ec06 st3w {z6.s, z7.s, z8.s}, p3, [x0, #3, mul vl]
#  z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
#  z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -11198,10 +11198,10 @@
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ e5c27007 st3d {z7.d, z8.d, z9.d}, p4, [x0, x2, lsl #3]
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
#  z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -11223,9 +11223,9 @@
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ a441f418 ld3b {z24.b, z25.b, z26.b}, p5/z, [x0, #3, mul vl]
-#  z24<127:0>: 0x0000000000000000000000ff00000000
-#  z25<127:0>: 0xa000000000000000000000ffc0000000
-#  z26<127:0>: 0x3700000000000000000000007f000000
+#  z24<127:0>: 0x00000000fe000000000000ff00000000
+#  z25<127:0>: 0x0000000080000000000000ffc0000000
+#  z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -11236,10 +11236,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
#  z24<255:128>: 0x00000000000000000000000000000000
#  z25<255:128>: 0x00000000000000000000000000000000
#  z26<255:128>: 0x00000000000000000000000000000000
@@ -11366,11 +11366,11 @@
0x~~~~~~~~~~~~~~~~ a541f81a ld3w {z26.s, z27.s, z28.s}, p6/z, [x0, #3, mul vl]
#  z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
#  z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-#  z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+#  z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
#  z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -11401,10 +11401,10 @@
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ a5c1f41b ld3d {z27.d, z28.d, z29.d}, p5/z, [x0, #3, mul vl]
#  z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-#  z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+#  z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+#  z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -11856,5 +11856,685 @@
#  z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+#  sp: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+#  x0: 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+#  x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp]
+#  w0:  0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp]
+#  w1:  0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp]
+#  w2:  0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp]
+#  w3:  0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp]
+#  w0:  0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp]
+#  w1:  0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp]
+#  w2:  0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp]
+#  w3:  0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp]
+#  w0:  0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp]
+#  w1:  0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp]
+#  w2:  0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp]
+#  w3:  0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp]
+#  x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp]
+#  w0:  0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp]
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp]
+#  w1:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp]
+#  w2:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp]
+#  w3:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp]
+#  w0:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp]
+#  w1:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp]
+#  w2:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp]
+#  w3:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp]
+#  x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp]
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp]
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp]
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp]
+#  w1:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp]
+#  w3:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp]
+#  w0:  0x0003009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp]
+#  w1:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp]
+#  w2:  0x0000469d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp]
+#  w3:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp]
+#  x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp]
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp]
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp]
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp]
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp]
+#  w1:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp]
+#  w2:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp]
+#  w3:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp]
+#  w0:  0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp]
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp]
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp]
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp]
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp]
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp]
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp]
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp]
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp]
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp]
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp]
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp]
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp]
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp]
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp]
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp]
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp]
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp]
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp]
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp]
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp]
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp]
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp]
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp]
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp]
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp]
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp]
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp]
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp]
+#  w2:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp]
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp]
+#  w0:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp]
+#  w1:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp]
+#  w2:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp]
+#  w3:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp]
+#  w0:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp]
+#  w1:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp]
+#  w2:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp]
+#  w3:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp]
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp]
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp]
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp]
+#  x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp]
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp]
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp]
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
+#  sp: 0x~~~~~~~~~~~~~~~~
0x~~~~~~~~~~~~~~~~ d65f03c0 ret
# Branch to 0x~~~~~~~~~~~~~~~~.
diff --git a/test/test-trace-reference/log-branch b/test/test-trace-reference/log-branch
index fff3143f..0491d505 100644
--- a/test/test-trace-reference/log-branch
+++ b/test/test-trace-reference/log-branch
@@ -2934,10 +2934,10 @@
# ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~
# z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -2958,10 +2958,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
# z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -3188,3 +3188,196 @@
# z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-branch-colour b/test/test-trace-reference/log-branch-colour
index 92faa1fd..7caf1a45 100644
--- a/test/test-trace-reference/log-branch-colour
+++ b/test/test-trace-reference/log-branch-colour
@@ -2934,10 +2934,10 @@
# ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~
#  z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
#  z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -2958,10 +2958,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
#  z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -3188,3 +3188,196 @@
#  z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-cpufeatures b/test/test-trace-reference/log-cpufeatures
index 804c06f5..795d3580 100644
--- a/test/test-trace-reference/log-cpufeatures
+++ b/test/test-trace-reference/log-cpufeatures
@@ -2292,7 +2292,7 @@
0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s // Needs: FP, NEON
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] // Needs: FP, NEON
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] // Needs: FP, NEON
@@ -2313,15 +2313,15 @@
0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] // Needs: FP, NEON
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] // Needs: FP, NEON
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d // Needs: FP, NEON
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] // Needs: FP, NEON
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] // Needs: FP, NEON
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] // Needs: FP, NEON
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d // Needs: FP, NEON
0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] // Needs: FP, NEON
@@ -2449,3 +2449,199 @@
0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] // Needs: SVE
0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] // Needs: SVE
0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] // Needs: SVE
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] // Needs: Atomics
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
diff --git a/test/test-trace-reference/log-cpufeatures-colour b/test/test-trace-reference/log-cpufeatures-colour
index 58f04790..170f34d5 100644
--- a/test/test-trace-reference/log-cpufeatures-colour
+++ b/test/test-trace-reference/log-cpufeatures-colour
@@ -2292,7 +2292,7 @@
0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s FP, NEON
0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s FP, NEON
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s FP, NEON
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] FP, NEON
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] FP, NEON
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] FP, NEON
0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d FP, NEON
0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] FP, NEON
@@ -2313,15 +2313,15 @@
0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) FP, NEON
0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 FP, NEON
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] FP, NEON
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] FP, NEON
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] FP, NEON
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] FP, NEON
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d FP, NEON
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] FP, NEON
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] FP, NEON
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s FP, NEON
0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] FP, NEON
0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s FP, NEON
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] FP, NEON
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] FP, NEON
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] FP, NEON
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] FP, NEON
0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d FP, NEON
0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] FP, NEON
@@ -2449,3 +2449,199 @@
0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] SVE
0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] SVE
0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] SVE
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] Atomics
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
diff --git a/test/test-trace-reference/log-cpufeatures-custom b/test/test-trace-reference/log-cpufeatures-custom
index 1e572f28..3975ec9d 100644
--- a/test/test-trace-reference/log-cpufeatures-custom
+++ b/test/test-trace-reference/log-cpufeatures-custom
@@ -2292,7 +2292,7 @@
0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s ### {FP, NEON} ###
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2] ### {FP, NEON} ###
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0] ### {FP, NEON} ###
@@ -2313,15 +2313,15 @@
0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000) ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25 ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1] ### {FP, NEON} ###
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2] ### {FP, NEON} ###
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d ### {FP, NEON} ###
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2] ### {FP, NEON} ###
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0] ### {FP, NEON} ###
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2] ### {FP, NEON} ###
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1] ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d ### {FP, NEON} ###
0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0] ### {FP, NEON} ###
@@ -2449,3 +2449,199 @@
0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl] ### {SVE} ###
0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2] ### {SVE} ###
0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl] ### {SVE} ###
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp] ### {Atomics} ###
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
diff --git a/test/test-trace-reference/log-disasm b/test/test-trace-reference/log-disasm
index 583328a3..53f8f010 100644
--- a/test/test-trace-reference/log-disasm
+++ b/test/test-trace-reference/log-disasm
@@ -2293,7 +2293,7 @@
0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s
0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1]
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0]
0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d
0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0]
@@ -2314,15 +2314,15 @@
0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000)
0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1]
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1]
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3]
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2]
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1]
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s
0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2]
0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0]
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2]
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1]
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1]
0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d
0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0]
@@ -2450,4 +2450,200 @@
0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl]
0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2]
0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl]
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
0x~~~~~~~~~~~~~~~~ d65f03c0 ret
diff --git a/test/test-trace-reference/log-disasm-colour b/test/test-trace-reference/log-disasm-colour
index 583328a3..53f8f010 100644
--- a/test/test-trace-reference/log-disasm-colour
+++ b/test/test-trace-reference/log-disasm-colour
@@ -2293,7 +2293,7 @@
0x~~~~~~~~~~~~~~~~ 2ea9f6bd fminp v29.2s, v21.2s, v9.2s
0x~~~~~~~~~~~~~~~~ 6eb5f700 fminp v0.4s, v24.4s, v21.4s
0x~~~~~~~~~~~~~~~~ 6eb0f919 fminv s25, v8.4s
-0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc91817 fmla d23, d0, v9.d[1]
0x~~~~~~~~~~~~~~~~ 5f8711f7 fmla s23, s15, v7.s[0]
0x~~~~~~~~~~~~~~~~ 4e66cd71 fmla v17.2d, v11.2d, v6.2d
0x~~~~~~~~~~~~~~~~ 4fcb13de fmla v30.2d, v30.2d, v11.d[0]
@@ -2314,15 +2314,15 @@
0x~~~~~~~~~~~~~~~~ 4f04f69f fmov v31.4s, #0x94 (-5.0000)
0x~~~~~~~~~~~~~~~~ 9eaf033c fmov v28.D[1], x25
0x~~~~~~~~~~~~~~~~ 9eae0052 fmov x18, v2.D[1]
-0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[2]
+0x~~~~~~~~~~~~~~~~ 5fc1988c fmul d12, d4, v1.d[1]
0x~~~~~~~~~~~~~~~~ 5faf983e fmul s30, s1, v15.s[3]
0x~~~~~~~~~~~~~~~~ 6e75dc19 fmul v25.2d, v0.2d, v21.2d
-0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[2]
+0x~~~~~~~~~~~~~~~~ 4fca9b0a fmul v10.2d, v24.2d, v10.d[1]
0x~~~~~~~~~~~~~~~~ 2e30df07 fmul v7.2s, v24.2s, v16.2s
0x~~~~~~~~~~~~~~~~ 0f849a01 fmul v1.2s, v16.2s, v4.s[2]
0x~~~~~~~~~~~~~~~~ 6e39df85 fmul v5.4s, v28.4s, v25.4s
0x~~~~~~~~~~~~~~~~ 4f88906b fmul v11.4s, v3.4s, v8.s[0]
-0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[2]
+0x~~~~~~~~~~~~~~~~ 7fc3993c fmulx d28, d9, v3.d[1]
0x~~~~~~~~~~~~~~~~ 7faf92b9 fmulx s25, s21, v15.s[1]
0x~~~~~~~~~~~~~~~~ 4e68df9f fmulx v31.2d, v28.2d, v8.2d
0x~~~~~~~~~~~~~~~~ 6fc692a3 fmulx v3.2d, v21.2d, v6.d[0]
@@ -2450,4 +2450,200 @@
0x~~~~~~~~~~~~~~~~ a4e1f81a ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x0, #4, mul vl]
0x~~~~~~~~~~~~~~~~ a562d81b ld4w {z27.s, z28.s, z29.s, z30.s}, p6/z, [x0, x2, lsl #2]
0x~~~~~~~~~~~~~~~~ a5e1f41c ld4d {z28.d, z29.d, z30.d, z31.d}, p5/z, [x0, #4, mul vl]
+0x~~~~~~~~~~~~~~~~ d10043ff sub sp, sp, #0x10 (16)
+0x~~~~~~~~~~~~~~~~ b200f3e0 mov x0, #0x5555555555555555
+0x~~~~~~~~~~~~~~~~ f90003e0 str x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382003e0 ldaddb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a003e1 ldaddab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386003e2 ldaddlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e003e3 ldaddalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782003e0 ldaddh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a003e1 ldaddah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786003e2 ldaddlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e003e3 ldaddalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82003e0 ldadd w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a003e1 ldadda w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86003e2 ldaddl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e003e3 ldaddal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82003e0 ldadd x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a003e1 ldadda x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86003e2 ldaddl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e003e3 ldaddal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382003ff staddb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386003ff staddlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782003ff staddh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786003ff staddlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82003ff stadd w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86003ff staddl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82003ff stadd x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86003ff staddl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382033e0 ldsetb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a033e1 ldsetab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386033e2 ldsetlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e033e3 ldsetalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782033e0 ldseth w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a033e1 ldsetah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786033e2 ldsetlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e033e3 ldsetalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82033e0 ldset w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a033e1 ldseta w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86033e2 ldsetl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e033e3 ldsetal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82033e0 ldset x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a033e1 ldseta x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86033e2 ldsetl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e033e3 ldsetal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382033ff stsetb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386033ff stsetlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782033ff stseth w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786033ff stsetlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82033ff stset w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86033ff stsetl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82033ff stset x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86033ff stsetl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382023e0 ldeorb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a023e1 ldeorab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386023e2 ldeorlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e023e3 ldeoralb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782023e0 ldeorh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a023e1 ldeorah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786023e2 ldeorlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e023e3 ldeoralh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82023e0 ldeor w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a023e1 ldeora w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86023e2 ldeorl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e023e3 ldeoral w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82023e0 ldeor x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a023e1 ldeora x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86023e2 ldeorl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e023e3 ldeoral x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382023ff steorb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386023ff steorlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782023ff steorh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786023ff steorlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82023ff steor w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86023ff steorl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82023ff steor x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86023ff steorl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382053e0 ldsminb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a053e1 ldsminab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386053e2 ldsminlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e053e3 ldsminalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782053e0 ldsminh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a053e1 ldsminah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786053e2 ldsminlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e053e3 ldsminalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82053e0 ldsmin w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a053e1 ldsmina w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86053e2 ldsminl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e053e3 ldsminal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82053e0 ldsmin x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a053e1 ldsmina x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86053e2 ldsminl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e053e3 ldsminal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382053ff stsminb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386053ff stsminlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782053ff stsminh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786053ff stsminlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82053ff stsmin w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86053ff stsminl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82053ff stsmin x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86053ff stsminl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382043e0 ldsmaxb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a043e1 ldsmaxab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386043e2 ldsmaxlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e043e3 ldsmaxalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782043e0 ldsmaxh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a043e1 ldsmaxah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786043e2 ldsmaxlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e043e3 ldsmaxalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82043e0 ldsmax w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a043e1 ldsmaxa w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86043e2 ldsmaxl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e043e3 ldsmaxal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82043e0 ldsmax x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a043e1 ldsmaxa x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86043e2 ldsmaxl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e043e3 ldsmaxal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382043ff stsmaxb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386043ff stsmaxlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782043ff stsmaxh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786043ff stsmaxlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82043ff stsmax w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86043ff stsmaxl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82043ff stsmax x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86043ff stsmaxl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382073e0 lduminb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a073e1 lduminab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386073e2 lduminlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e073e3 lduminalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782073e0 lduminh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a073e1 lduminah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786073e2 lduminlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e073e3 lduminalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82073e0 ldumin w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a073e1 ldumina w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86073e2 lduminl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e073e3 lduminal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82073e0 ldumin x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a073e1 ldumina x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86073e2 lduminl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e073e3 lduminal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382073ff stuminb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386073ff stuminlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782073ff stuminh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786073ff stuminlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82073ff stumin w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86073ff stuminl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82073ff stumin x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86073ff stuminl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382063e0 ldumaxb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a063e1 ldumaxab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386063e2 ldumaxlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e063e3 ldumaxalb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782063e0 ldumaxh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a063e1 ldumaxah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786063e2 ldumaxlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e063e3 ldumaxalh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82063e0 ldumax w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a063e1 ldumaxa w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86063e2 ldumaxl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e063e3 ldumaxal w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82063e0 ldumax x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a063e1 ldumaxa x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86063e2 ldumaxl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e063e3 ldumaxal x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382063ff stumaxb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386063ff stumaxlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782063ff stumaxh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786063ff stumaxlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82063ff stumax w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86063ff stumaxl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82063ff stumax x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86063ff stumaxl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 382013e0 ldclrb w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 38a013e1 ldclrab w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 386013e2 ldclrlb w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 38e013e3 ldclralb w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ 782013e0 ldclrh w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ 78a013e1 ldclrah w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ 786013e2 ldclrlh w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ 78e013e3 ldclralh w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ b82013e0 ldclr w0, w0, [sp]
+0x~~~~~~~~~~~~~~~~ b8a013e1 ldclra w0, w1, [sp]
+0x~~~~~~~~~~~~~~~~ b86013e2 ldclrl w0, w2, [sp]
+0x~~~~~~~~~~~~~~~~ b8e013e3 ldclral w0, w3, [sp]
+0x~~~~~~~~~~~~~~~~ f82013e0 ldclr x0, x0, [sp]
+0x~~~~~~~~~~~~~~~~ f8a013e1 ldclra x0, x1, [sp]
+0x~~~~~~~~~~~~~~~~ f86013e2 ldclrl x0, x2, [sp]
+0x~~~~~~~~~~~~~~~~ f8e013e3 ldclral x0, x3, [sp]
+0x~~~~~~~~~~~~~~~~ 382013ff stclrb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 386013ff stclrlb w0, [sp]
+0x~~~~~~~~~~~~~~~~ 782013ff stclrh w0, [sp]
+0x~~~~~~~~~~~~~~~~ 786013ff stclrlh w0, [sp]
+0x~~~~~~~~~~~~~~~~ b82013ff stclr w0, [sp]
+0x~~~~~~~~~~~~~~~~ b86013ff stclrl w0, [sp]
+0x~~~~~~~~~~~~~~~~ f82013ff stclr x0, [sp]
+0x~~~~~~~~~~~~~~~~ f86013ff stclrl x0, [sp]
+0x~~~~~~~~~~~~~~~~ 910043ff add sp, sp, #0x10 (16)
0x~~~~~~~~~~~~~~~~ d65f03c0 ret
diff --git a/test/test-trace-reference/log-regs b/test/test-trace-reference/log-regs
index 0894e3d3..c350f523 100644
--- a/test/test-trace-reference/log-regs
+++ b/test/test-trace-reference/log-regs
@@ -792,3 +792,294 @@
# x1: 0x~~~~~~~~~~~~~~~~
# lr: 0x0000000000000000
# x18: 0x0000000000000000
+# sp: 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# sp: 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-regs-colour b/test/test-trace-reference/log-regs-colour
index 0df9f2e5..55839980 100644
--- a/test/test-trace-reference/log-regs-colour
+++ b/test/test-trace-reference/log-regs-colour
@@ -792,3 +792,294 @@
#  x1: 0x~~~~~~~~~~~~~~~~
#  lr: 0x0000000000000000
#  x18: 0x0000000000000000
+#  sp: 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  sp: 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-state b/test/test-trace-reference/log-state
index 8113a01d..0b5069dc 100644
--- a/test/test-trace-reference/log-state
+++ b/test/test-trace-reference/log-state
@@ -861,8 +861,8 @@
# d5: 0x3f90000000000000
# d16: 0x0000000000000000
# d15: 0x0000000000000000
-# v7: 0x00000000000000007ff000004e81442e
-# v8: 0x0000000000000000c004000000000000
+# v7: 0x0000000000000000000000004e81442e
+# v8: 0x00000000000000000000000000000000
# s29: 0x00000000
# s15: 0x49800000
# s27: 0x00000000
@@ -873,14 +873,14 @@
# d3: 0x0000000000000000
# d28: 0x41dfffffffc00000
# d27: 0x0000000000000000
-# v11: 0x0000000000000000400000004f7fe000
-# v0: 0x000000000000000043d21c00480a8294
+# v11: 0x0000000000000000000000004f7fe000
+# v0: 0x000000000000000000000000480a8294
# s20: 0x00000000
# s21: 0x00000000
# s6: 0x5f000000
# s7: 0x38ff0000
-# v19: 0x000000000000000043d21c00480a8294
-# v16: 0x0000000000000000400000004f7f2000
+# v19: 0x000000000000000000000000480a8294
+# v16: 0x0000000000000000000000004f7f2000
# v0: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
@@ -890,59 +890,59 @@
# v10: 0x00000000000000003f0000007f600000
# v31: 0x495000018a83940149500000d3029400
# v10: 0x495000018a83940168a6954c14cfd693
-# v15: 0x00000000000000006328b14b89d7c527
+# v15: 0x00000000000000001f56954b89d7c527
# v27: 0x0000000000000000495000000a029400
# v25: 0x000000000000000041efffffffc00000
# v13: 0x00000000000000000000000037feffff
# v4: 0x00000000000000000000000000000000
-# v10: 0x000000000000000000000000827f4696
+# v10: 0x0000000000000000000000003ead2a96
# v10: 0x00000000000000000000000000003c7f
# v31: 0x0000000000000000000000000000ffff
# v16: 0x00000000000000000000000000000000
# v0: 0x00000000000000000000000000000000
# v31: 0x000000000000b87f000000000000ffff
-# v14: 0x000000000000000043d21c00480a8294
-# v3: 0x0000000020febf0000000000c4000000
+# v14: 0x000000000000000000000000480a8294
+# v3: 0x0000000020febf000000000000000000
# v8: 0x000000007f8000003effffffffe00000
# v22: 0x00000000000000003c7fffff00000000
-# v29: 0x00000000000000005fd2ca9e00000000
+# v29: 0x00000000000000000000ca9e00000000
# v30: 0x0000000000000000000000003c7fffff
# v12: 0x0000000000000000000037000000bbfe
# v17: 0x000000003700bbfe00007f803efeffe0
# v27: 0x00000000000000000000000000000075
# v12: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000003c7e
-# v19: 0x00000000000000000000000000002a70
+# v19: 0x0000000000000000000000000000ca9e
# v14: 0x00000000000000000000000000003c7e
# v10: 0x00000000000000000000000000000000
# v5: 0x00000000000000000000000000000000
-# v26: 0x0000000020febf0000000000c4000000
+# v26: 0x0000000020febf000000000000000000
# v7: 0x000000000000000000000000381b0000
# v28: 0x000000000000000040dfdcffdcc00000
-# v29: 0x00000000000000005fd2ca1200000000
+# v29: 0x00000000000000000000ca1200000000
# v12: 0x0000000000000000000000000000ffff
# v18: 0x000000000000000000000000ff67ff67
# v12: 0x00000000007ebf000000000000000000
# v2: 0x0000000000000000495000008a828000
-# v8: 0x000000007f8000003effffffcc000000
+# v8: 0x000000007f8000003effffffc8000000
# v5: 0x00000000000000000000000000000000
# v9: 0x0100000008009801010000000800dc00
-# v14: 0x000000000000000000000000c4000000
+# v14: 0x00000000000000000000000000000000
# v29: 0x07070707070707070707070707070707
# v21: 0x00000000000000000000001f0000001f
# v1: 0x0000000000000000000f000f000f000f
# v27: 0x0000001f0000001f0000001f0000001f
# v19: 0x00000000000000000707070707070707
-# v15: 0x000f000f000f000f000f000f0001000f
+# v15: 0x000f000f000f000f000f000f000f000f
# v1: 0x08080808080808080808080808080808
# v27: 0x00000000000000000000001100000002
# v9: 0x00000000000000000007001000040000
-# v31: 0x0000000c0000000c0000000c0000000f
+# v31: 0x0000000c0000000c0000000c0000000c
# v14: 0x00000000000000000505050505050505
-# v6: 0x00100010001000100001001000010000
+# v6: 0x00100010001000100010001000010000
# v18: 0x00000000000000000000000000000000
# v14: 0x00000000000000000000000000000000
-# v19: 0xffffffff000000ff0000000000ffffff
+# v19: 0xffffffff000000ff00000000ffffffff
# v15: 0xffffffffffffffffff00ff00ff00ffff
# v12: 0xffffffffffffffffffffffffffffffff
# v8: 0xffffffffffffffff0000000000000000
@@ -962,11 +962,11 @@
# v22: 0xffffffffffffffffffffffffffff0000
# v28: 0x0000000000000000ffffffffffffffff
# v6: 0xffffffffffffffffffffffffffffffff
-# v25: 0x000000000000000000000000ffffffff
+# v25: 0x00000000000000000000000000000000
# v21: 0x0000000000000000ffffffffffffffff
-# v16: 0x0000000000000000ffffffff0000ffff
+# v16: 0x0000000000000000ffffffffffffffff
# v23: 0x0000000000000000ffffffffffffffff
-# v7: 0xffffffffffffffff0000000000000000
+# v7: 0xffffffffffffffffffffffff00000000
# v0: 0x00000000000000000000000000000000
# v10: 0x0000000000000000ff00ff00ffff0000
# v21: 0x0000000000000000ffffffffffffffff
@@ -982,9 +982,9 @@
# v12: 0x00000000000000000000000000000000
# v28: 0x0000000000000000000000000000ffff
# v22: 0x00000000000000000000000000000000
-# v5: 0x0000000000000000ffffffffffffffff
+# v5: 0x000000000000000000000000ffffffff
# v13: 0x00000000000000000000000000000000
-# v27: 0x0000000000000000000000ff000000ff
+# v27: 0x0000000000000000ffffffff000000ff
# v5: 0x00000000000000000000000000000000
# v22: 0x00000000000000000000000000000000
# v6: 0x00000000000000000000000000000000
@@ -995,7 +995,7 @@
# v31: 0x00000000000000000000000000000000
# v9: 0x00000000000000000000000000000000
# v7: 0x00000000000000000000000000000000
-# v11: 0x0000000000000000ffffffff00000000
+# v11: 0x0000000000000000ffffffffffff0000
# v1: 0x00000000000000000000000000000000
# v21: 0xffffffffffffffffffffffffffffffff
# v8: 0xffffffffffffffffffffffffffffffff
@@ -1016,9 +1016,9 @@
# v7: 0x0000000000000000ffffffffffffffff
# v7: 0x0000000000000000ffffffffffffffff
# v25: 0x00000000000000000000000000000000
-# v0: 0x0000000000000000ffffffff00000000
+# v0: 0x0000000000000000ffffffffffff0000
# v24: 0x00000000000000000000000000000000
-# v26: 0x0000000000000000ffffffff00000000
+# v26: 0x0000000000000000ffffffffffff0000
# v1: 0x0000000000000000ffffffffffffffff
# v28: 0x0000000000000000ffffffffffffffff
# v26: 0x00000000000000000000000000000000
@@ -5088,7 +5088,7 @@
# v26: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
# v11: 0x00000000000000000000000000000000
-# v8: 0x37a00000000000000000000000000000
+# v8: 0x00000000000000000000000000000000
# v24: 0x0000000000000000377f0000377f0000
# v4: 0x0000000000000000ffffffffffffffff
# v25: 0x00000000000000000000000000000000
@@ -5117,24 +5117,24 @@
# v9: 0x00000000000000000000000000000000
# v30: 0x00000000000000000000000000000000
# v19: 0x00000000000000000000000000000000
-# v15: 0x7ff00000000000007ff8000000000000
+# v15: 0x7ff80000000000007ff8000000000000
# v12: 0x00000000000000007fc000007fc00000
# v19: 0xffffffffffffffffffffffffffffffff
-# v19: 0x37a00000000000000000000000000000
+# v19: 0x00000000000000000000000000000000
# v25: 0x00000000000000007fc000007fc00000
-# v6: 0x7ff00000000000007ff8000000000000
-# v16: 0x37a0000000000000000000000180fe00
+# v6: 0x7ff80000000000007ff8000000000000
+# v16: 0x0000000000000000000000000180fe00
# v15: 0x00000000000000000000000000000000
-# v23: 0x37a0000000000000000000000180fe00
-# v6: 0x000000000000000037a0000000000000
+# v23: 0x0000000000000000000000000180fe00
+# v6: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000000000
-# v8: 0x37a00000000000007fc000007fc00000
+# v8: 0x000000000180fe007fc000007fc00000
# v13: 0x0000000000000000ffffffff7fc00000
# v15: 0xffffffff000000000000000000000000
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
# v20: 0x0000000000000000ffffffff00000000
# v18: 0x00000000000000000000000000000000
-# v9: 0x00000000ffffffff37a0000000000000
+# v9: 0x00000000ffffffff000000000180fe00
# v7: 0x0000000000000000ffffffffffffffff
# v18: 0x000000000000000000000000ffffffff
# v31: 0x00000000000000000000000000000000
@@ -5163,14 +5163,14 @@
# v19: 0x00000000000000007fc000007fc00000
# v24: 0x00000000000000000000000000000000
# v16: 0x00000000000000000000000000000000
-# v27: 0x00000000000000000000000037a00000
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
+# v27: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
# v5: 0x00000000000000000000000000000000
# v18: 0x000000000000000000000000ffffffff
# v5: 0x00000000000000000000000000000000
# v3: 0x0000000000000000000000007fffffff
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
# v26: 0x00000000000000007fffffff00000000
# v14: 0xbfd6000000000000bfd6000000000000
# v26: 0x00000000000000003f6800003f680000
@@ -5198,7 +5198,7 @@
# v5: 0x80000000800000008000000080000000
# v18: 0x7ff00000000000007ff0000000000000
# v10: 0x00000000000000007f8000007f800000
-# v5: 0x7f8000007f800000474c80007f800000
+# v5: 0x7f8000007f8000007f8000007f800000
# v22: 0x40000000000000004000000000000000
# v31: 0x00000000000000004000000040000000
# v18: 0x40000000400000004000000040000000
@@ -5244,7 +5244,7 @@
# v9: 0x43dfe000001fe0000000000000000000
# v26: 0x000000000000000040fff00000200000
# v11: 0x00000000000000000000000000000000
-# v29: 0x7fc000007fc000000000000000000000
+# v29: 0x00000000000000000000000000000000
# v22: 0x4effe000000000004e001a4000000000
# v18: 0x4207bfc03d7f00000000000000000000
# p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~
@@ -5795,9 +5795,9 @@
# z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# z24<127:0>: 0x0000000000000000000000ff00000000
-# z25<127:0>: 0xa000000000000000000000ffc0000000
-# z26<127:0>: 0x3700000000000000000000007f000000
+# z24<127:0>: 0x00000000fe000000000000ff00000000
+# z25<127:0>: 0x0000000080000000000000ffc0000000
+# z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -5808,10 +5808,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# z24<255:128>: 0x00000000000000000000000000000000
# z25<255:128>: 0x00000000000000000000000000000000
# z26<255:128>: 0x00000000000000000000000000000000
@@ -5936,11 +5936,11 @@
# ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~
# z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
# z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -5970,10 +5970,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -6211,3 +6211,294 @@
# z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# sp: 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0003469d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000469d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00034600 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x0000009d <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# w0: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w1: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w2: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# w3: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+# x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# wzr: 0x00000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# sp: 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-state-colour b/test/test-trace-reference/log-state-colour
index 0f1b75e4..3234c7a8 100644
--- a/test/test-trace-reference/log-state-colour
+++ b/test/test-trace-reference/log-state-colour
@@ -861,8 +861,8 @@
#  d5:  0x3f90000000000000
#  d16:  0x0000000000000000
#  d15:  0x0000000000000000
-#  v7: 0x00000000000000007ff000004e81442e
-#  v8: 0x0000000000000000c004000000000000
+#  v7: 0x0000000000000000000000004e81442e
+#  v8: 0x00000000000000000000000000000000
#  s29:  0x00000000
#  s15:  0x49800000
#  s27:  0x00000000
@@ -873,14 +873,14 @@
#  d3:  0x0000000000000000
#  d28:  0x41dfffffffc00000
#  d27:  0x0000000000000000
-#  v11: 0x0000000000000000400000004f7fe000
-#  v0: 0x000000000000000043d21c00480a8294
+#  v11: 0x0000000000000000000000004f7fe000
+#  v0: 0x000000000000000000000000480a8294
#  s20:  0x00000000
#  s21:  0x00000000
#  s6:  0x5f000000
#  s7:  0x38ff0000
-#  v19: 0x000000000000000043d21c00480a8294
-#  v16: 0x0000000000000000400000004f7f2000
+#  v19: 0x000000000000000000000000480a8294
+#  v16: 0x0000000000000000000000004f7f2000
#  v0: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
@@ -890,59 +890,59 @@
#  v10: 0x00000000000000003f0000007f600000
#  v31: 0x495000018a83940149500000d3029400
#  v10: 0x495000018a83940168a6954c14cfd693
-#  v15: 0x00000000000000006328b14b89d7c527
+#  v15: 0x00000000000000001f56954b89d7c527
#  v27: 0x0000000000000000495000000a029400
#  v25: 0x000000000000000041efffffffc00000
#  v13: 0x00000000000000000000000037feffff
#  v4: 0x00000000000000000000000000000000
-#  v10: 0x000000000000000000000000827f4696
+#  v10: 0x0000000000000000000000003ead2a96
#  v10: 0x00000000000000000000000000003c7f
#  v31: 0x0000000000000000000000000000ffff
#  v16: 0x00000000000000000000000000000000
#  v0: 0x00000000000000000000000000000000
#  v31: 0x000000000000b87f000000000000ffff
-#  v14: 0x000000000000000043d21c00480a8294
-#  v3: 0x0000000020febf0000000000c4000000
+#  v14: 0x000000000000000000000000480a8294
+#  v3: 0x0000000020febf000000000000000000
#  v8: 0x000000007f8000003effffffffe00000
#  v22: 0x00000000000000003c7fffff00000000
-#  v29: 0x00000000000000005fd2ca9e00000000
+#  v29: 0x00000000000000000000ca9e00000000
#  v30: 0x0000000000000000000000003c7fffff
#  v12: 0x0000000000000000000037000000bbfe
#  v17: 0x000000003700bbfe00007f803efeffe0
#  v27: 0x00000000000000000000000000000075
#  v12: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000003c7e
-#  v19: 0x00000000000000000000000000002a70
+#  v19: 0x0000000000000000000000000000ca9e
#  v14: 0x00000000000000000000000000003c7e
#  v10: 0x00000000000000000000000000000000
#  v5: 0x00000000000000000000000000000000
-#  v26: 0x0000000020febf0000000000c4000000
+#  v26: 0x0000000020febf000000000000000000
#  v7: 0x000000000000000000000000381b0000
#  v28: 0x000000000000000040dfdcffdcc00000
-#  v29: 0x00000000000000005fd2ca1200000000
+#  v29: 0x00000000000000000000ca1200000000
#  v12: 0x0000000000000000000000000000ffff
#  v18: 0x000000000000000000000000ff67ff67
#  v12: 0x00000000007ebf000000000000000000
#  v2: 0x0000000000000000495000008a828000
-#  v8: 0x000000007f8000003effffffcc000000
+#  v8: 0x000000007f8000003effffffc8000000
#  v5: 0x00000000000000000000000000000000
#  v9: 0x0100000008009801010000000800dc00
-#  v14: 0x000000000000000000000000c4000000
+#  v14: 0x00000000000000000000000000000000
#  v29: 0x07070707070707070707070707070707
#  v21: 0x00000000000000000000001f0000001f
#  v1: 0x0000000000000000000f000f000f000f
#  v27: 0x0000001f0000001f0000001f0000001f
#  v19: 0x00000000000000000707070707070707
-#  v15: 0x000f000f000f000f000f000f0001000f
+#  v15: 0x000f000f000f000f000f000f000f000f
#  v1: 0x08080808080808080808080808080808
#  v27: 0x00000000000000000000001100000002
#  v9: 0x00000000000000000007001000040000
-#  v31: 0x0000000c0000000c0000000c0000000f
+#  v31: 0x0000000c0000000c0000000c0000000c
#  v14: 0x00000000000000000505050505050505
-#  v6: 0x00100010001000100001001000010000
+#  v6: 0x00100010001000100010001000010000
#  v18: 0x00000000000000000000000000000000
#  v14: 0x00000000000000000000000000000000
-#  v19: 0xffffffff000000ff0000000000ffffff
+#  v19: 0xffffffff000000ff00000000ffffffff
#  v15: 0xffffffffffffffffff00ff00ff00ffff
#  v12: 0xffffffffffffffffffffffffffffffff
#  v8: 0xffffffffffffffff0000000000000000
@@ -962,11 +962,11 @@
#  v22: 0xffffffffffffffffffffffffffff0000
#  v28: 0x0000000000000000ffffffffffffffff
#  v6: 0xffffffffffffffffffffffffffffffff
-#  v25: 0x000000000000000000000000ffffffff
+#  v25: 0x00000000000000000000000000000000
#  v21: 0x0000000000000000ffffffffffffffff
-#  v16: 0x0000000000000000ffffffff0000ffff
+#  v16: 0x0000000000000000ffffffffffffffff
#  v23: 0x0000000000000000ffffffffffffffff
-#  v7: 0xffffffffffffffff0000000000000000
+#  v7: 0xffffffffffffffffffffffff00000000
#  v0: 0x00000000000000000000000000000000
#  v10: 0x0000000000000000ff00ff00ffff0000
#  v21: 0x0000000000000000ffffffffffffffff
@@ -982,9 +982,9 @@
#  v12: 0x00000000000000000000000000000000
#  v28: 0x0000000000000000000000000000ffff
#  v22: 0x00000000000000000000000000000000
-#  v5: 0x0000000000000000ffffffffffffffff
+#  v5: 0x000000000000000000000000ffffffff
#  v13: 0x00000000000000000000000000000000
-#  v27: 0x0000000000000000000000ff000000ff
+#  v27: 0x0000000000000000ffffffff000000ff
#  v5: 0x00000000000000000000000000000000
#  v22: 0x00000000000000000000000000000000
#  v6: 0x00000000000000000000000000000000
@@ -995,7 +995,7 @@
#  v31: 0x00000000000000000000000000000000
#  v9: 0x00000000000000000000000000000000
#  v7: 0x00000000000000000000000000000000
-#  v11: 0x0000000000000000ffffffff00000000
+#  v11: 0x0000000000000000ffffffffffff0000
#  v1: 0x00000000000000000000000000000000
#  v21: 0xffffffffffffffffffffffffffffffff
#  v8: 0xffffffffffffffffffffffffffffffff
@@ -1016,9 +1016,9 @@
#  v7: 0x0000000000000000ffffffffffffffff
#  v7: 0x0000000000000000ffffffffffffffff
#  v25: 0x00000000000000000000000000000000
-#  v0: 0x0000000000000000ffffffff00000000
+#  v0: 0x0000000000000000ffffffffffff0000
#  v24: 0x00000000000000000000000000000000
-#  v26: 0x0000000000000000ffffffff00000000
+#  v26: 0x0000000000000000ffffffffffff0000
#  v1: 0x0000000000000000ffffffffffffffff
#  v28: 0x0000000000000000ffffffffffffffff
#  v26: 0x00000000000000000000000000000000
@@ -5088,7 +5088,7 @@
#  v26: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
#  v11: 0x00000000000000000000000000000000
-#  v8: 0x37a00000000000000000000000000000
+#  v8: 0x00000000000000000000000000000000
#  v24: 0x0000000000000000377f0000377f0000
#  v4: 0x0000000000000000ffffffffffffffff
#  v25: 0x00000000000000000000000000000000
@@ -5117,24 +5117,24 @@
#  v9: 0x00000000000000000000000000000000
#  v30: 0x00000000000000000000000000000000
#  v19: 0x00000000000000000000000000000000
-#  v15: 0x7ff00000000000007ff8000000000000
+#  v15: 0x7ff80000000000007ff8000000000000
#  v12: 0x00000000000000007fc000007fc00000
#  v19: 0xffffffffffffffffffffffffffffffff
-#  v19: 0x37a00000000000000000000000000000
+#  v19: 0x00000000000000000000000000000000
#  v25: 0x00000000000000007fc000007fc00000
-#  v6: 0x7ff00000000000007ff8000000000000
-#  v16: 0x37a0000000000000000000000180fe00
+#  v6: 0x7ff80000000000007ff8000000000000
+#  v16: 0x0000000000000000000000000180fe00
#  v15: 0x00000000000000000000000000000000
-#  v23: 0x37a0000000000000000000000180fe00
-#  v6: 0x000000000000000037a0000000000000
+#  v23: 0x0000000000000000000000000180fe00
+#  v6: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000000000
-#  v8: 0x37a00000000000007fc000007fc00000
+#  v8: 0x000000000180fe007fc000007fc00000
#  v13: 0x0000000000000000ffffffff7fc00000
#  v15: 0xffffffff000000000000000000000000
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
#  v20: 0x0000000000000000ffffffff00000000
#  v18: 0x00000000000000000000000000000000
-#  v9: 0x00000000ffffffff37a0000000000000
+#  v9: 0x00000000ffffffff000000000180fe00
#  v7: 0x0000000000000000ffffffffffffffff
#  v18: 0x000000000000000000000000ffffffff
#  v31: 0x00000000000000000000000000000000
@@ -5163,14 +5163,14 @@
#  v19: 0x00000000000000007fc000007fc00000
#  v24: 0x00000000000000000000000000000000
#  v16: 0x00000000000000000000000000000000
-#  v27: 0x00000000000000000000000037a00000
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
+#  v27: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
#  v5: 0x00000000000000000000000000000000
#  v18: 0x000000000000000000000000ffffffff
#  v5: 0x00000000000000000000000000000000
#  v3: 0x0000000000000000000000007fffffff
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
#  v26: 0x00000000000000007fffffff00000000
#  v14: 0xbfd6000000000000bfd6000000000000
#  v26: 0x00000000000000003f6800003f680000
@@ -5198,7 +5198,7 @@
#  v5: 0x80000000800000008000000080000000
#  v18: 0x7ff00000000000007ff0000000000000
#  v10: 0x00000000000000007f8000007f800000
-#  v5: 0x7f8000007f800000474c80007f800000
+#  v5: 0x7f8000007f8000007f8000007f800000
#  v22: 0x40000000000000004000000000000000
#  v31: 0x00000000000000004000000040000000
#  v18: 0x40000000400000004000000040000000
@@ -5244,7 +5244,7 @@
#  v9: 0x43dfe000001fe0000000000000000000
#  v26: 0x000000000000000040fff00000200000
#  v11: 0x00000000000000000000000000000000
-#  v29: 0x7fc000007fc000000000000000000000
+#  v29: 0x00000000000000000000000000000000
#  v22: 0x4effe000000000004e001a4000000000
#  v18: 0x4207bfc03d7f00000000000000000000
#  p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~
@@ -5795,9 +5795,9 @@
#  z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-#  z24<127:0>: 0x0000000000000000000000ff00000000
-#  z25<127:0>: 0xa000000000000000000000ffc0000000
-#  z26<127:0>: 0x3700000000000000000000007f000000
+#  z24<127:0>: 0x00000000fe000000000000ff00000000
+#  z25<127:0>: 0x0000000080000000000000ffc0000000
+#  z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -5808,10 +5808,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
#  z24<255:128>: 0x00000000000000000000000000000000
#  z25<255:128>: 0x00000000000000000000000000000000
#  z26<255:128>: 0x00000000000000000000000000000000
@@ -5936,11 +5936,11 @@
# ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~
#  z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
#  z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-#  z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+#  z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
#  z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -5970,10 +5970,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
#  z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-#  z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+#  z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+#  z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -6211,3 +6211,294 @@
#  z31<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  sp: 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000055
+# ╙─ 0x55 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x000000aa
+# ╙─ 0xaa <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x000000ff
+# ╙─ 0xff <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000054
+# ╙─ 0x54 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x000055a9
+# ╙─ 0x55a9 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x000055fe
+# ╙─ 0x55fe <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000aba7
+# ╙─ 0xaba7 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000150
+# ╙─ 0x0150 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x555556f9 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x5555aca2 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xaaab039b <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00005a94 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000055566ba0 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x2d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0xba <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x1d47 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0xced4 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000095
+# ╙─ 0x95 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0003469d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000003469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000469d
+# ╙─ 0x469d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00004600
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000469d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00034600 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000000034600 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x4600 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0xff9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0003ff9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0xffffff9d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x0000009d <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x000000000000009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x009d <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x0000009d
+# ╙─ 0x9d <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  w0:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w1:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w2:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  w3:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  x0: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x1: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x2: 0x~~~~~~~~~~~~~~~~ <- 0x~~~~~~~~~~~~~~~~
+#  x3: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x00 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000
+# ╙─ 0x0000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  wzr:  0x00000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  xzr: 0x0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+#  sp: 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-vregs b/test/test-trace-reference/log-vregs
index 60df96af..72a2d0ef 100644
--- a/test/test-trace-reference/log-vregs
+++ b/test/test-trace-reference/log-vregs
@@ -368,8 +368,8 @@
# d5: 0x3f90000000000000
# d16: 0x0000000000000000
# d15: 0x0000000000000000
-# v7: 0x00000000000000007ff000004e81442e
-# v8: 0x0000000000000000c004000000000000
+# v7: 0x0000000000000000000000004e81442e
+# v8: 0x00000000000000000000000000000000
# s29: 0x00000000
# s15: 0x49800000
# s27: 0x00000000
@@ -380,14 +380,14 @@
# d3: 0x0000000000000000
# d28: 0x41dfffffffc00000
# d27: 0x0000000000000000
-# v11: 0x0000000000000000400000004f7fe000
-# v0: 0x000000000000000043d21c00480a8294
+# v11: 0x0000000000000000000000004f7fe000
+# v0: 0x000000000000000000000000480a8294
# s20: 0x00000000
# s21: 0x00000000
# s6: 0x5f000000
# s7: 0x38ff0000
-# v19: 0x000000000000000043d21c00480a8294
-# v16: 0x0000000000000000400000004f7f2000
+# v19: 0x000000000000000000000000480a8294
+# v16: 0x0000000000000000000000004f7f2000
# v0: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
@@ -397,59 +397,59 @@
# v10: 0x00000000000000003f0000007f600000
# v31: 0x495000018a83940149500000d3029400
# v10: 0x495000018a83940168a6954c14cfd693
-# v15: 0x00000000000000006328b14b89d7c527
+# v15: 0x00000000000000001f56954b89d7c527
# v27: 0x0000000000000000495000000a029400
# v25: 0x000000000000000041efffffffc00000
# v13: 0x00000000000000000000000037feffff
# v4: 0x00000000000000000000000000000000
-# v10: 0x000000000000000000000000827f4696
+# v10: 0x0000000000000000000000003ead2a96
# v10: 0x00000000000000000000000000003c7f
# v31: 0x0000000000000000000000000000ffff
# v16: 0x00000000000000000000000000000000
# v0: 0x00000000000000000000000000000000
# v31: 0x000000000000b87f000000000000ffff
-# v14: 0x000000000000000043d21c00480a8294
-# v3: 0x0000000020febf0000000000c4000000
+# v14: 0x000000000000000000000000480a8294
+# v3: 0x0000000020febf000000000000000000
# v8: 0x000000007f8000003effffffffe00000
# v22: 0x00000000000000003c7fffff00000000
-# v29: 0x00000000000000005fd2ca9e00000000
+# v29: 0x00000000000000000000ca9e00000000
# v30: 0x0000000000000000000000003c7fffff
# v12: 0x0000000000000000000037000000bbfe
# v17: 0x000000003700bbfe00007f803efeffe0
# v27: 0x00000000000000000000000000000075
# v12: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000003c7e
-# v19: 0x00000000000000000000000000002a70
+# v19: 0x0000000000000000000000000000ca9e
# v14: 0x00000000000000000000000000003c7e
# v10: 0x00000000000000000000000000000000
# v5: 0x00000000000000000000000000000000
-# v26: 0x0000000020febf0000000000c4000000
+# v26: 0x0000000020febf000000000000000000
# v7: 0x000000000000000000000000381b0000
# v28: 0x000000000000000040dfdcffdcc00000
-# v29: 0x00000000000000005fd2ca1200000000
+# v29: 0x00000000000000000000ca1200000000
# v12: 0x0000000000000000000000000000ffff
# v18: 0x000000000000000000000000ff67ff67
# v12: 0x00000000007ebf000000000000000000
# v2: 0x0000000000000000495000008a828000
-# v8: 0x000000007f8000003effffffcc000000
+# v8: 0x000000007f8000003effffffc8000000
# v5: 0x00000000000000000000000000000000
# v9: 0x0100000008009801010000000800dc00
-# v14: 0x000000000000000000000000c4000000
+# v14: 0x00000000000000000000000000000000
# v29: 0x07070707070707070707070707070707
# v21: 0x00000000000000000000001f0000001f
# v1: 0x0000000000000000000f000f000f000f
# v27: 0x0000001f0000001f0000001f0000001f
# v19: 0x00000000000000000707070707070707
-# v15: 0x000f000f000f000f000f000f0001000f
+# v15: 0x000f000f000f000f000f000f000f000f
# v1: 0x08080808080808080808080808080808
# v27: 0x00000000000000000000001100000002
# v9: 0x00000000000000000007001000040000
-# v31: 0x0000000c0000000c0000000c0000000f
+# v31: 0x0000000c0000000c0000000c0000000c
# v14: 0x00000000000000000505050505050505
-# v6: 0x00100010001000100001001000010000
+# v6: 0x00100010001000100010001000010000
# v18: 0x00000000000000000000000000000000
# v14: 0x00000000000000000000000000000000
-# v19: 0xffffffff000000ff0000000000ffffff
+# v19: 0xffffffff000000ff00000000ffffffff
# v15: 0xffffffffffffffffff00ff00ff00ffff
# v12: 0xffffffffffffffffffffffffffffffff
# v8: 0xffffffffffffffff0000000000000000
@@ -469,11 +469,11 @@
# v22: 0xffffffffffffffffffffffffffff0000
# v28: 0x0000000000000000ffffffffffffffff
# v6: 0xffffffffffffffffffffffffffffffff
-# v25: 0x000000000000000000000000ffffffff
+# v25: 0x00000000000000000000000000000000
# v21: 0x0000000000000000ffffffffffffffff
-# v16: 0x0000000000000000ffffffff0000ffff
+# v16: 0x0000000000000000ffffffffffffffff
# v23: 0x0000000000000000ffffffffffffffff
-# v7: 0xffffffffffffffff0000000000000000
+# v7: 0xffffffffffffffffffffffff00000000
# v0: 0x00000000000000000000000000000000
# v10: 0x0000000000000000ff00ff00ffff0000
# v21: 0x0000000000000000ffffffffffffffff
@@ -489,9 +489,9 @@
# v12: 0x00000000000000000000000000000000
# v28: 0x0000000000000000000000000000ffff
# v22: 0x00000000000000000000000000000000
-# v5: 0x0000000000000000ffffffffffffffff
+# v5: 0x000000000000000000000000ffffffff
# v13: 0x00000000000000000000000000000000
-# v27: 0x0000000000000000000000ff000000ff
+# v27: 0x0000000000000000ffffffff000000ff
# v5: 0x00000000000000000000000000000000
# v22: 0x00000000000000000000000000000000
# v6: 0x00000000000000000000000000000000
@@ -502,7 +502,7 @@
# v31: 0x00000000000000000000000000000000
# v9: 0x00000000000000000000000000000000
# v7: 0x00000000000000000000000000000000
-# v11: 0x0000000000000000ffffffff00000000
+# v11: 0x0000000000000000ffffffffffff0000
# v1: 0x00000000000000000000000000000000
# v21: 0xffffffffffffffffffffffffffffffff
# v8: 0xffffffffffffffffffffffffffffffff
@@ -523,9 +523,9 @@
# v7: 0x0000000000000000ffffffffffffffff
# v7: 0x0000000000000000ffffffffffffffff
# v25: 0x00000000000000000000000000000000
-# v0: 0x0000000000000000ffffffff00000000
+# v0: 0x0000000000000000ffffffffffff0000
# v24: 0x00000000000000000000000000000000
-# v26: 0x0000000000000000ffffffff00000000
+# v26: 0x0000000000000000ffffffffffff0000
# v1: 0x0000000000000000ffffffffffffffff
# v28: 0x0000000000000000ffffffffffffffff
# v26: 0x00000000000000000000000000000000
@@ -4247,7 +4247,7 @@
# v26: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
# v11: 0x00000000000000000000000000000000
-# v8: 0x37a00000000000000000000000000000
+# v8: 0x00000000000000000000000000000000
# v24: 0x0000000000000000377f0000377f0000
# v4: 0x0000000000000000ffffffffffffffff
# v25: 0x00000000000000000000000000000000
@@ -4276,24 +4276,24 @@
# v9: 0x00000000000000000000000000000000
# v30: 0x00000000000000000000000000000000
# v19: 0x00000000000000000000000000000000
-# v15: 0x7ff00000000000007ff8000000000000
+# v15: 0x7ff80000000000007ff8000000000000
# v12: 0x00000000000000007fc000007fc00000
# v19: 0xffffffffffffffffffffffffffffffff
-# v19: 0x37a00000000000000000000000000000
+# v19: 0x00000000000000000000000000000000
# v25: 0x00000000000000007fc000007fc00000
-# v6: 0x7ff00000000000007ff8000000000000
-# v16: 0x37a0000000000000000000000180fe00
+# v6: 0x7ff80000000000007ff8000000000000
+# v16: 0x0000000000000000000000000180fe00
# v15: 0x00000000000000000000000000000000
-# v23: 0x37a0000000000000000000000180fe00
-# v6: 0x000000000000000037a0000000000000
+# v23: 0x0000000000000000000000000180fe00
+# v6: 0x00000000000000000000000000000000
# v27: 0x00000000000000000000000000000000
-# v8: 0x37a00000000000007fc000007fc00000
+# v8: 0x000000000180fe007fc000007fc00000
# v13: 0x0000000000000000ffffffff7fc00000
# v15: 0xffffffff000000000000000000000000
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
# v20: 0x0000000000000000ffffffff00000000
# v18: 0x00000000000000000000000000000000
-# v9: 0x00000000ffffffff37a0000000000000
+# v9: 0x00000000ffffffff000000000180fe00
# v7: 0x0000000000000000ffffffffffffffff
# v18: 0x000000000000000000000000ffffffff
# v31: 0x00000000000000000000000000000000
@@ -4322,14 +4322,14 @@
# v19: 0x00000000000000007fc000007fc00000
# v24: 0x00000000000000000000000000000000
# v16: 0x00000000000000000000000000000000
-# v27: 0x00000000000000000000000037a00000
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
+# v27: 0x00000000000000000000000000000000
# v21: 0x00000000000000000000000000000000
# v5: 0x00000000000000000000000000000000
# v18: 0x000000000000000000000000ffffffff
# v5: 0x00000000000000000000000000000000
# v3: 0x0000000000000000000000007fffffff
-# v27: 0x00000000000000000000000037a00000
+# v27: 0x00000000000000000000000000000000
# v26: 0x00000000000000007fffffff00000000
# v14: 0xbfd6000000000000bfd6000000000000
# v26: 0x00000000000000003f6800003f680000
@@ -4356,7 +4356,7 @@
# v5: 0x80000000800000008000000080000000
# v18: 0x7ff00000000000007ff0000000000000
# v10: 0x00000000000000007f8000007f800000
-# v5: 0x7f8000007f800000474c80007f800000
+# v5: 0x7f8000007f8000007f8000007f800000
# v22: 0x40000000000000004000000000000000
# v31: 0x00000000000000004000000040000000
# v18: 0x40000000400000004000000040000000
@@ -4402,7 +4402,7 @@
# v9: 0x43dfe000001fe0000000000000000000
# v26: 0x000000000000000040fff00000200000
# v11: 0x00000000000000000000000000000000
-# v29: 0x7fc000007fc000000000000000000000
+# v29: 0x00000000000000000000000000000000
# v22: 0x4effe000000000004e001a4000000000
# v18: 0x4207bfc03d7f00000000000000000000
# p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~
@@ -4953,9 +4953,9 @@
# z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# z24<127:0>: 0x0000000000000000000000ff00000000
-# z25<127:0>: 0xa000000000000000000000ffc0000000
-# z26<127:0>: 0x3700000000000000000000007f000000
+# z24<127:0>: 0x00000000fe000000000000ff00000000
+# z25<127:0>: 0x0000000080000000000000ffc0000000
+# z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -4966,10 +4966,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# z24<255:128>: 0x00000000000000000000000000000000
# z25<255:128>: 0x00000000000000000000000000000000
# z26<255:128>: 0x00000000000000000000000000000000
@@ -5094,11 +5094,11 @@
# ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~
# z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
# z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-# z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+# z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -5128,10 +5128,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-# z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+# z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+# z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
diff --git a/test/test-trace-reference/log-vregs-colour b/test/test-trace-reference/log-vregs-colour
index 31d1c24c..d15dcfec 100644
--- a/test/test-trace-reference/log-vregs-colour
+++ b/test/test-trace-reference/log-vregs-colour
@@ -368,8 +368,8 @@
#  d5:  0x3f90000000000000
#  d16:  0x0000000000000000
#  d15:  0x0000000000000000
-#  v7: 0x00000000000000007ff000004e81442e
-#  v8: 0x0000000000000000c004000000000000
+#  v7: 0x0000000000000000000000004e81442e
+#  v8: 0x00000000000000000000000000000000
#  s29:  0x00000000
#  s15:  0x49800000
#  s27:  0x00000000
@@ -380,14 +380,14 @@
#  d3:  0x0000000000000000
#  d28:  0x41dfffffffc00000
#  d27:  0x0000000000000000
-#  v11: 0x0000000000000000400000004f7fe000
-#  v0: 0x000000000000000043d21c00480a8294
+#  v11: 0x0000000000000000000000004f7fe000
+#  v0: 0x000000000000000000000000480a8294
#  s20:  0x00000000
#  s21:  0x00000000
#  s6:  0x5f000000
#  s7:  0x38ff0000
-#  v19: 0x000000000000000043d21c00480a8294
-#  v16: 0x0000000000000000400000004f7f2000
+#  v19: 0x000000000000000000000000480a8294
+#  v16: 0x0000000000000000000000004f7f2000
#  v0: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
@@ -397,59 +397,59 @@
#  v10: 0x00000000000000003f0000007f600000
#  v31: 0x495000018a83940149500000d3029400
#  v10: 0x495000018a83940168a6954c14cfd693
-#  v15: 0x00000000000000006328b14b89d7c527
+#  v15: 0x00000000000000001f56954b89d7c527
#  v27: 0x0000000000000000495000000a029400
#  v25: 0x000000000000000041efffffffc00000
#  v13: 0x00000000000000000000000037feffff
#  v4: 0x00000000000000000000000000000000
-#  v10: 0x000000000000000000000000827f4696
+#  v10: 0x0000000000000000000000003ead2a96
#  v10: 0x00000000000000000000000000003c7f
#  v31: 0x0000000000000000000000000000ffff
#  v16: 0x00000000000000000000000000000000
#  v0: 0x00000000000000000000000000000000
#  v31: 0x000000000000b87f000000000000ffff
-#  v14: 0x000000000000000043d21c00480a8294
-#  v3: 0x0000000020febf0000000000c4000000
+#  v14: 0x000000000000000000000000480a8294
+#  v3: 0x0000000020febf000000000000000000
#  v8: 0x000000007f8000003effffffffe00000
#  v22: 0x00000000000000003c7fffff00000000
-#  v29: 0x00000000000000005fd2ca9e00000000
+#  v29: 0x00000000000000000000ca9e00000000
#  v30: 0x0000000000000000000000003c7fffff
#  v12: 0x0000000000000000000037000000bbfe
#  v17: 0x000000003700bbfe00007f803efeffe0
#  v27: 0x00000000000000000000000000000075
#  v12: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000003c7e
-#  v19: 0x00000000000000000000000000002a70
+#  v19: 0x0000000000000000000000000000ca9e
#  v14: 0x00000000000000000000000000003c7e
#  v10: 0x00000000000000000000000000000000
#  v5: 0x00000000000000000000000000000000
-#  v26: 0x0000000020febf0000000000c4000000
+#  v26: 0x0000000020febf000000000000000000
#  v7: 0x000000000000000000000000381b0000
#  v28: 0x000000000000000040dfdcffdcc00000
-#  v29: 0x00000000000000005fd2ca1200000000
+#  v29: 0x00000000000000000000ca1200000000
#  v12: 0x0000000000000000000000000000ffff
#  v18: 0x000000000000000000000000ff67ff67
#  v12: 0x00000000007ebf000000000000000000
#  v2: 0x0000000000000000495000008a828000
-#  v8: 0x000000007f8000003effffffcc000000
+#  v8: 0x000000007f8000003effffffc8000000
#  v5: 0x00000000000000000000000000000000
#  v9: 0x0100000008009801010000000800dc00
-#  v14: 0x000000000000000000000000c4000000
+#  v14: 0x00000000000000000000000000000000
#  v29: 0x07070707070707070707070707070707
#  v21: 0x00000000000000000000001f0000001f
#  v1: 0x0000000000000000000f000f000f000f
#  v27: 0x0000001f0000001f0000001f0000001f
#  v19: 0x00000000000000000707070707070707
-#  v15: 0x000f000f000f000f000f000f0001000f
+#  v15: 0x000f000f000f000f000f000f000f000f
#  v1: 0x08080808080808080808080808080808
#  v27: 0x00000000000000000000001100000002
#  v9: 0x00000000000000000007001000040000
-#  v31: 0x0000000c0000000c0000000c0000000f
+#  v31: 0x0000000c0000000c0000000c0000000c
#  v14: 0x00000000000000000505050505050505
-#  v6: 0x00100010001000100001001000010000
+#  v6: 0x00100010001000100010001000010000
#  v18: 0x00000000000000000000000000000000
#  v14: 0x00000000000000000000000000000000
-#  v19: 0xffffffff000000ff0000000000ffffff
+#  v19: 0xffffffff000000ff00000000ffffffff
#  v15: 0xffffffffffffffffff00ff00ff00ffff
#  v12: 0xffffffffffffffffffffffffffffffff
#  v8: 0xffffffffffffffff0000000000000000
@@ -469,11 +469,11 @@
#  v22: 0xffffffffffffffffffffffffffff0000
#  v28: 0x0000000000000000ffffffffffffffff
#  v6: 0xffffffffffffffffffffffffffffffff
-#  v25: 0x000000000000000000000000ffffffff
+#  v25: 0x00000000000000000000000000000000
#  v21: 0x0000000000000000ffffffffffffffff
-#  v16: 0x0000000000000000ffffffff0000ffff
+#  v16: 0x0000000000000000ffffffffffffffff
#  v23: 0x0000000000000000ffffffffffffffff
-#  v7: 0xffffffffffffffff0000000000000000
+#  v7: 0xffffffffffffffffffffffff00000000
#  v0: 0x00000000000000000000000000000000
#  v10: 0x0000000000000000ff00ff00ffff0000
#  v21: 0x0000000000000000ffffffffffffffff
@@ -489,9 +489,9 @@
#  v12: 0x00000000000000000000000000000000
#  v28: 0x0000000000000000000000000000ffff
#  v22: 0x00000000000000000000000000000000
-#  v5: 0x0000000000000000ffffffffffffffff
+#  v5: 0x000000000000000000000000ffffffff
#  v13: 0x00000000000000000000000000000000
-#  v27: 0x0000000000000000000000ff000000ff
+#  v27: 0x0000000000000000ffffffff000000ff
#  v5: 0x00000000000000000000000000000000
#  v22: 0x00000000000000000000000000000000
#  v6: 0x00000000000000000000000000000000
@@ -502,7 +502,7 @@
#  v31: 0x00000000000000000000000000000000
#  v9: 0x00000000000000000000000000000000
#  v7: 0x00000000000000000000000000000000
-#  v11: 0x0000000000000000ffffffff00000000
+#  v11: 0x0000000000000000ffffffffffff0000
#  v1: 0x00000000000000000000000000000000
#  v21: 0xffffffffffffffffffffffffffffffff
#  v8: 0xffffffffffffffffffffffffffffffff
@@ -523,9 +523,9 @@
#  v7: 0x0000000000000000ffffffffffffffff
#  v7: 0x0000000000000000ffffffffffffffff
#  v25: 0x00000000000000000000000000000000
-#  v0: 0x0000000000000000ffffffff00000000
+#  v0: 0x0000000000000000ffffffffffff0000
#  v24: 0x00000000000000000000000000000000
-#  v26: 0x0000000000000000ffffffff00000000
+#  v26: 0x0000000000000000ffffffffffff0000
#  v1: 0x0000000000000000ffffffffffffffff
#  v28: 0x0000000000000000ffffffffffffffff
#  v26: 0x00000000000000000000000000000000
@@ -4247,7 +4247,7 @@
#  v26: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
#  v11: 0x00000000000000000000000000000000
-#  v8: 0x37a00000000000000000000000000000
+#  v8: 0x00000000000000000000000000000000
#  v24: 0x0000000000000000377f0000377f0000
#  v4: 0x0000000000000000ffffffffffffffff
#  v25: 0x00000000000000000000000000000000
@@ -4276,24 +4276,24 @@
#  v9: 0x00000000000000000000000000000000
#  v30: 0x00000000000000000000000000000000
#  v19: 0x00000000000000000000000000000000
-#  v15: 0x7ff00000000000007ff8000000000000
+#  v15: 0x7ff80000000000007ff8000000000000
#  v12: 0x00000000000000007fc000007fc00000
#  v19: 0xffffffffffffffffffffffffffffffff
-#  v19: 0x37a00000000000000000000000000000
+#  v19: 0x00000000000000000000000000000000
#  v25: 0x00000000000000007fc000007fc00000
-#  v6: 0x7ff00000000000007ff8000000000000
-#  v16: 0x37a0000000000000000000000180fe00
+#  v6: 0x7ff80000000000007ff8000000000000
+#  v16: 0x0000000000000000000000000180fe00
#  v15: 0x00000000000000000000000000000000
-#  v23: 0x37a0000000000000000000000180fe00
-#  v6: 0x000000000000000037a0000000000000
+#  v23: 0x0000000000000000000000000180fe00
+#  v6: 0x00000000000000000000000000000000
#  v27: 0x00000000000000000000000000000000
-#  v8: 0x37a00000000000007fc000007fc00000
+#  v8: 0x000000000180fe007fc000007fc00000
#  v13: 0x0000000000000000ffffffff7fc00000
#  v15: 0xffffffff000000000000000000000000
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
#  v20: 0x0000000000000000ffffffff00000000
#  v18: 0x00000000000000000000000000000000
-#  v9: 0x00000000ffffffff37a0000000000000
+#  v9: 0x00000000ffffffff000000000180fe00
#  v7: 0x0000000000000000ffffffffffffffff
#  v18: 0x000000000000000000000000ffffffff
#  v31: 0x00000000000000000000000000000000
@@ -4322,14 +4322,14 @@
#  v19: 0x00000000000000007fc000007fc00000
#  v24: 0x00000000000000000000000000000000
#  v16: 0x00000000000000000000000000000000
-#  v27: 0x00000000000000000000000037a00000
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
+#  v27: 0x00000000000000000000000000000000
#  v21: 0x00000000000000000000000000000000
#  v5: 0x00000000000000000000000000000000
#  v18: 0x000000000000000000000000ffffffff
#  v5: 0x00000000000000000000000000000000
#  v3: 0x0000000000000000000000007fffffff
-#  v27: 0x00000000000000000000000037a00000
+#  v27: 0x00000000000000000000000000000000
#  v26: 0x00000000000000007fffffff00000000
#  v14: 0xbfd6000000000000bfd6000000000000
#  v26: 0x00000000000000003f6800003f680000
@@ -4356,7 +4356,7 @@
#  v5: 0x80000000800000008000000080000000
#  v18: 0x7ff00000000000007ff0000000000000
#  v10: 0x00000000000000007f8000007f800000
-#  v5: 0x7f8000007f800000474c80007f800000
+#  v5: 0x7f8000007f8000007f8000007f800000
#  v22: 0x40000000000000004000000000000000
#  v31: 0x00000000000000004000000040000000
#  v18: 0x40000000400000004000000040000000
@@ -4402,7 +4402,7 @@
#  v9: 0x43dfe000001fe0000000000000000000
#  v26: 0x000000000000000040fff00000200000
#  v11: 0x00000000000000000000000000000000
-#  v29: 0x7fc000007fc000000000000000000000
+#  v29: 0x00000000000000000000000000000000
#  v22: 0x4effe000000000004e001a4000000000
#  v18: 0x4207bfc03d7f00000000000000000000
#  p8<15:0>: 0b 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 <- 0x~~~~~~~~~~~~~~~~
@@ -4953,9 +4953,9 @@
#  z24<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-#  z24<127:0>: 0x0000000000000000000000ff00000000
-#  z25<127:0>: 0xa000000000000000000000ffc0000000
-#  z26<127:0>: 0x3700000000000000000000007f000000
+#  z24<127:0>: 0x00000000fe000000000000ff00000000
+#  z25<127:0>: 0x0000000080000000000000ffc0000000
+#  z26<127:0>: 0x0000000001000000000000007f000000
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─ 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙─── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ║ ╙───── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
@@ -4966,10 +4966,10 @@
# ║ ║ ║ ║ ║ ║ ╙───────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ║ ╙─────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ║ ║ ╙───────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ║ ║ ║ ╙─────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
+# ║ ║ ║ ╙─────────────────────── 0x01'80'fe <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
# ║ ╙───────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
-# ╙─────────────────────────────── 0x37'a0'00 <- 0x~~~~~~~~~~~~~~~~
+# ╙─────────────────────────────── 0x00'00'00 <- 0x~~~~~~~~~~~~~~~~
#  z24<255:128>: 0x00000000000000000000000000000000
#  z25<255:128>: 0x00000000000000000000000000000000
#  z26<255:128>: 0x00000000000000000000000000000000
@@ -5094,11 +5094,11 @@
# ╙───────────────────────────── 0xedec'ebea'e9e8 <- 0x~~~~~~~~~~~~~~~~
#  z26<127:0>: 0x00000000000000000000ffff00000000 (0.000, 0.000, 9.183e-41, 0.000)
#  z27<127:0>: 0x00000000000000007fff000000000000 (0.000, 0.000, 9.183e-41, 0.000)
-#  z28<127:0>: 0x37a0000000000000000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
+#  z28<127:0>: 0x000000000180fe00000000007fc00000 (0.000, 0.000, 9.183e-41, 0.000)
# ║ ║ ║ ╙─ 0x7fc00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ║ ║ ╙───────── 0x00000000'7fff0000'0000ffff <- 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
#  z26<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000, 0.000, 0.000)
@@ -5128,10 +5128,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 <- 0x~~~~~~~~~~~~~~~~
#  z27<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z28<127:0>: 0x00000000000000000000ffff7fc00000 (0.000, 0.000)
-#  z29<127:0>: 0x37a0000000000000000000007fff0000 (0.000, 0.000)
+#  z28<127:0>: 0x000000000180fe000000ffff7fc00000 (0.000, 0.000)
+#  z29<127:0>: 0x0000000000000000000000007fff0000 (0.000, 0.000)
# ║ ╙─ 0x000000007fff0000'0000ffff7fc00000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x37a0000000000000'0000000000000000'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x0000000000000000'000000000180fe00'0000000000000000 <- 0x~~~~~~~~~~~~~~~~
#  z27<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z28<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z29<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
diff --git a/test/test-trace-reference/log-write b/test/test-trace-reference/log-write
index fff3143f..0491d505 100644
--- a/test/test-trace-reference/log-write
+++ b/test/test-trace-reference/log-write
@@ -2934,10 +2934,10 @@
# ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~
# z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -2958,10 +2958,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-# z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+# z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
# z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
# z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -3188,3 +3188,196 @@
# z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x55 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x55a9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x8d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0xb18d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x95 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x0000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x9d -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+# x0<7:0>: 0x00 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<15:0>: 0x0000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<31:0>: 0x00000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
diff --git a/test/test-trace-reference/log-write-colour b/test/test-trace-reference/log-write-colour
index 92faa1fd..7caf1a45 100644
--- a/test/test-trace-reference/log-write-colour
+++ b/test/test-trace-reference/log-write-colour
@@ -2934,10 +2934,10 @@
# ╙───────────────────────────── 0x0000'0000'0000 -> 0x~~~~~~~~~~~~~~~~
#  z6<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000, ..., 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000, ..., 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000, ..., 0.000)
# ║ ║ ╙─ 0x7fc00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────────────── 0x37a00000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ║ ╙───────────────── 0x0180fe00'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
#  z6<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, ..., 0.000, 0.000)
@@ -2958,10 +2958,10 @@
# ║ ╙───────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────────────── 0x00000000'00000000'00000000 -> 0x~~~~~~~~~~~~~~~~
#  z7<127:0>: 0x00000000000000000000000000000000 (0.000, 0.000)
-#  z8<127:0>: 0x37a00000000000007fc000007fc00000 (0.000, 0.000)
+#  z8<127:0>: 0x000000000180fe007fc000007fc00000 (0.000, 0.000)
#  z9<127:0>: 0x43dfe000001fe0000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'7fc000007fc00000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
-# ╙───────────────── 0x43dfe000001fe000'37a0000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+# ╙───────────────── 0x43dfe000001fe000'000000000180fe00'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
#  z7<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z8<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
#  z9<255:128>: 0x00000000000000000000000000000000 (0.000, 0.000)
@@ -3188,3 +3188,196 @@
#  z5<639:512>: 0x00000000000000000000000000000000 (0.000, 0.000)
# ║ ╙─ 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
# ╙───────────────── 0x0000000000000000'0000000000000000'0000000000000000'0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x5555555555555555 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x55 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x55a9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x555556f9 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x8d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0xb18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x5555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x555555555555b18d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x95 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000003469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000469d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0003ff9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x0000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x000000000000009d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x9d -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+#  x0<7:0>:  0x00 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<15:0>:  0x0000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<31:0>:  0x00000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
+#  x0<63:0>: 0x0000000000000000 -> 0x~~~~~~~~~~~~~~~~
diff --git a/tools/check_recent_coverage.sh b/tools/check_recent_coverage.sh
new file mode 100755
index 00000000..f6168a2e
--- /dev/null
+++ b/tools/check_recent_coverage.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright 2021, VIXL authors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * Neither the name of ARM Limited nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This code coverage script assumes a Linux-like environment, and has been
+# tested on Ubuntu 18.04.
+
+COVERAGELOG="tools/code_coverage.log"
+MONTHSECONDS=$(( 60*60*24*30 ))
+
+if [ ! -f "$COVERAGELOG" ]; then
+ echo "No code coverage log found."
+ echo "Run tools/code_coverage.sh to generate one."
+ exit 2;
+fi
+
+LASTCOMMIT=`git log -1 --date=format:%s | grep -P "^Date:" | grep -Po "\d+"`
+LASTCOVERAGE=`tail -n1 $COVERAGELOG | cut -d' ' -f1`
+
+d=$(( $LASTCOMMIT - $LASTCOVERAGE ))
+if (( d < $MONTHSECONDS )); then
+ exit 0;
+fi
+
+echo "Code coverage record too old."
+echo "Run tools/code_coverage.sh to generate a newer one."
+exit 1;
diff --git a/tools/clang_tidy.py b/tools/clang_tidy.py
index 7ba18746..8607547b 100755
--- a/tools/clang_tidy.py
+++ b/tools/clang_tidy.py
@@ -143,7 +143,7 @@ def ClangTidyFiles(files, clang_tidy, jobs = 1, progress_prefix = ''):
return -1
opts = ['--', '-DVIXL_INCLUDE_TARGET_AARCH64', '-DVIXL_CODE_BUFFER_MALLOC',
- '-DVIXL_DEBUG','-DVIXL_INCLUDE_SIMLUATOR_AARCH64',
+ '-DVIXL_DEBUG','-DVIXL_INCLUDE_SIMULATOR_AARCH64',
'-DVIXL_INCLUDE_TARGET_A32','-DVIXL_INCLUDE_TARGET_T32',
'-DVIXL_INCLUDE_TARGET_A64']
opts += ['-I%s' % config.dir_src_vixl]
diff --git a/tools/code_coverage.log b/tools/code_coverage.log
new file mode 100644
index 00000000..f19570a8
--- /dev/null
+++ b/tools/code_coverage.log
@@ -0,0 +1,10 @@
+1624976463 83.00% 97.44% 95.16%
+1628075147 83.04% 97.52% 95.33%
+1633016028 83.00% 97.52% 95.32%
+1636647628 82.97% 97.54% 95.28%
+1639684221 82.92% 97.51% 94.06%
+1642688881 82.94% 97.51% 95.27%
+1646150629 82.94% 97.51% 95.36%
+1647535694 82.93% 97.52% 95.36%
+1651138061 82.94% 97.52% 95.36%
+1653484786 82.79% 97.46% 95.51%
diff --git a/tools/code_coverage.sh b/tools/code_coverage.sh
new file mode 100755
index 00000000..5525bb0e
--- /dev/null
+++ b/tools/code_coverage.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Copyright 2021, VIXL authors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * Neither the name of ARM Limited nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This code coverage script assumes a Linux-like environment, and has been
+# tested on Ubuntu 18.04.
+
+if ! hash pv 2>/dev/null ; then
+ echo "This script requires 'pv'"
+ echo "On Ubuntu, install it with 'sudo apt-get install pv'"
+ exit 1;
+fi
+
+export CXX=clang++
+export LLVM_PROFILE_FILE=$(mktemp)
+PROFDATA=$(mktemp)
+BUILDDIR="obj/target_a64/mode_debug/symbols_on/compiler_clang++/std_c++14/simulator_aarch64/negative_testing_off/code_buffer_allocator_mmap"
+RUNNER="$BUILDDIR/test/test-runner"
+
+# Build with code coverage instrumentation enabled.
+scons mode=debug coverage=on target=a64 all -j8
+
+if [ ! -f "$RUNNER" ]; then
+ echo "$RUNNER not found."
+ echo "No test-runner for profiling."
+ exit 1;
+fi
+
+# Count the number of tests.
+tests=`$RUNNER --list | wc -l`
+
+# Generate a raw profile for a run using all tests.
+echo "Running $tests tests. This may take a while..."
+$RUNNER --run-all 2>&1 | grep -P "^Running [A-Z0-9]{3,}_" | pv -lbp -w 40 -s $tests >/dev/null
+
+# Process the raw profile data for reporting.
+llvm-profdata merge -sparse $LLVM_PROFILE_FILE -o $PROFDATA
+
+# Print a coverage report for the source files in src/
+REPORT="llvm-cov report $RUNNER -instr-profile=$PROFDATA $BUILDDIR/src/"
+eval $REPORT
+
+# Log the report summary line.
+eval $REPORT | tail -n1 | tr -s " " | cut -d" " -f4,7,10 | xargs -i printf "%s %s\n" `date +%s` {} >>tools/code_coverage.log
+
+# Clean up.
+rm -f $LLVM_PROFILE_FILE
+rm -f $PROFDATA
diff --git a/tools/make_instruction_doc_aarch64.pl b/tools/make_instruction_doc_aarch64.pl
index 4ecb9c69..9ff32f8d 100755
--- a/tools/make_instruction_doc_aarch64.pl
+++ b/tools/make_instruction_doc_aarch64.pl
@@ -35,6 +35,9 @@ my $hfile = "src/aarch64/assembler-aarch64.h";
# Extra pseudo instructions added to AArch64.
my @extras = qw/bind debug dci dc32 dc64 place/;
+# SVE instructions that can't be inferred from their argument types.
+my @sves = qw/addvl addpl rdvl cntb cnth cntw cntd ctermeq ctermne setffr/;
+
my %inst = (); # Global hash of instructions.
# Set record separator to one or more consecutive new lines. This causes $_ to
@@ -45,7 +48,7 @@ open(IN, "<$hfile") or die("Can't open header file $hfile.\n");
while(<IN>)
{
# Find a function formatted like an instruction.
- if(my($t) = /^ ((?:void|inline void) [a-z][a-z0-9]{0,8}_?)\(/mgp)
+ if(my($t) = /^ ((?:void|inline void) [a-z][a-z0-9]{0,9}_?)\(/mgp)
{
# Everything before the function match, ie. the comments.
my $before = ${^PREMATCH};
@@ -55,7 +58,7 @@ while(<IN>)
my $after = ${^POSTMATCH};
# Extract the instruction.
- my($i) = $t =~ /(?:void|inline void) ([a-z][a-z0-9]{0,8})/;
+ my($i) = $t =~ /(?:void|inline void) ([a-z][a-z0-9]{0,9})/;
# Extract the comment from before the function. Drop comment characters
# and format the architecture version suffix, if present.
@@ -76,7 +79,13 @@ while(<IN>)
# Establish the type of the instruction.
my $type = 'integer';
- ($p =~ /VRegister/) and $type = 'float';
+ if ($p =~ /([PZ]Register|SVEMemOperand)/) {
+ $type = 'sve';
+ } elsif ($i =~ /[su]?q?(inc|dec)[bhwd]/ || $i ~~ @sves) {
+ $type = 'sve';
+ } elsif ($p =~ /VRegister/) {
+ $type = 'float';
+ }
($i ~~ @extras) and $type = 'pseudo';
# Special case to distinguish dc() the data constant placing function from
@@ -89,10 +98,13 @@ while(<IN>)
$inst{$p}->{'type'} = $type;
$inst{$p}->{'mnemonic'} = $i;
$inst{$p}->{'description'} = $d;
+ $inst{$p}->{'initial'} = substr($i, 0, 1);
}
}
close(IN);
+my $links = get_links_list(\%inst);
+
print <<HEADER;
VIXL Supported Instruction List
===============================
@@ -102,12 +114,45 @@ disassembler and simulator. The simulator may not support all floating point
operations to the precision required by AArch64 - please check the simulator
source code for details.
+#### AAch64 integer instructions ####
+$links->{'integer'}
+
+#### AArch64 floating point and NEON instructions ####
+$links->{'float'}
+
+#### AArch64 Scalable Vector Extension (SVE) instructions ####
+$links->{'sve'}
+
+#### Additional or pseudo instructions ####
+$links->{'pseudo'}
+
+___
+
HEADER
print describe_insts('AArch64 integer instructions', 'integer');
print describe_insts('AArch64 floating point and NEON instructions', 'float');
+print describe_insts('AArch64 Scalable Vector Extension (SVE) instructions', 'sve');
print describe_insts('Additional or pseudo instructions', 'pseudo');
+# Get a hash of links to each initialed section of the document, keyed by type.
+sub get_links_list {
+ my $insts = shift;
+ my %initials;
+ foreach my $i (sort(keys(%$insts))) {
+ my $inst = $insts->{$i};
+ $initials{$inst->{type}}->{$inst->{initial}}++;
+ }
+ my %result;
+ foreach my $t (keys(%initials)) {
+ foreach my $i (sort(keys(%{$initials{$t}}))) {
+ push(@{$result{$t}}, "[$i](#$t-$i)");
+ }
+ $result{$t} = join(' ', @{$result{$t}});
+ }
+ return \%result;
+}
+
# Sort instructions by mnemonic and then description.
sub inst_sort
{
@@ -125,9 +170,14 @@ sub describe_insts
$result .= '-' x length($title);
$result .= "\n\n";
+ my $last_initial = '';
foreach my $i (sort inst_sort keys(%inst))
{
next if($inst{$i}->{'type'} ne $type);
+ unless ($last_initial eq $inst{$i}->{'initial'}) {
+ $last_initial = $inst{$i}->{'initial'};
+ $result .= sprintf("<a id=\"%s-%s\">\n\n", lc($type), $last_initial);
+ }
$result .= sprintf("### %s ###\n\n%s\n\n",
uc($inst{$i}->{'mnemonic'}),
$inst{$i}->{'description'});
diff --git a/tools/test.py b/tools/test.py
index 9a081d0c..75c700d3 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -168,6 +168,8 @@ def BuildOptions():
help='Do not run clang-tidy.')
general_arguments.add_argument('--notest', action='store_true',
help='Do not run tests.')
+ general_arguments.add_argument('--nocheck-code-coverage', action='store_true',
+ help='Do not check code coverage results log.')
general_arguments.add_argument('--fail-early', action='store_true',
help='Exit as soon as a test fails.')
general_arguments.add_argument(
@@ -273,6 +275,10 @@ def RunClangTidy(clang_path, jobs):
jobs = jobs,
progress_prefix = 'clang-tidy: ')
+def CheckCodeCoverage():
+ command = ['tools/check_recent_coverage.sh']
+ return RunCommand(command)
+
def BuildAll(build_options, jobs, environment_options):
scons_command = ['scons', '-C', dir_root, 'all', '-j', str(jobs)]
if util.IsCommandAvailable('ccache'):
@@ -359,6 +365,9 @@ if __name__ == '__main__':
if args.under_valgrind:
util.require_program('valgrind')
+ if not args.nocheck_code_coverage:
+ rc.Combine(CheckCodeCoverage())
+
tests = test_runner.TestQueue()
if not args.nolint and not args.dry_run:
rc.Combine(RunLinter(args.jobs))